diff --git a/scripts/arkcc_asan.py b/scripts/arkcc_asan.py new file mode 100755 index 0000000000000000000000000000000000000000..a886a2a138b58fa6e5e13b1984753ea9ee59665b --- /dev/null +++ b/scripts/arkcc_asan.py @@ -0,0 +1,184 @@ +#!/usr/bin/python3 + +import os +import sys + +MAPLE_ROOT = os.environ['MAPLE_ROOT'] +LINARO = f"{MAPLE_ROOT}/tools/gcc-linaro-7.5.0" +MAPLE_EXECUTE_BIN = f"{MAPLE_ROOT}/output/aarch64-clang-debug/bin" + +LINARO_GCC = f"{LINARO}/bin/aarch64-linux-gnu-gcc" +SAVE_MAPLE_TEMPS = False +ASAN = True +ASAN_FLAG = "0x1" +OPT_LEVEL = "-O0" + +ISYSTEM_FLAGS=f"-isystem {LINARO}/aarch64-linux-gnu/libc/usr/include -isystem {LINARO}/lib/gcc/aarch64-linux-gnu/7.5.0/include" +always_include_flags = f'{ISYSTEM_FLAGS}' + +# include_main = '-DINCLUDEMAIN' + + +def run_cmd(cmd): + ret = os.system(cmd) + # print(cmd) + assert ret == 0, f"Fail {cmd}\nExit with {hex(ret) if ret > 0 else ('-' + hex(-ret))}." + + +def src2ast(src, flags): + cmd = f"{MAPLE_ROOT}/tools/clang+llvm-15.0.4-x86_64-linux-gnu-ubuntu-18.04-enhanced/bin/clang -target aarch64-linux-gnu -emit-ast {src} -o {src}.ast {flags} {always_include_flags} > stdout.txt 2> stderr.txt" + run_cmd(cmd) + + +def src2ass(src, flags): + cmd = f"{MAPLE_ROOT}/output/aarch64-clang-debug/bin/maple --save-temps -S {src} -o {src}.s {flags} {always_include_flags} > stdout.txt 2> stderr.txt" + run_cmd(cmd) + + +def ast2mpl(src): + cmd = f"{MAPLE_EXECUTE_BIN}/hir2mpl {src}.ast -o {src}.mpl > stdout.txt 2> stderr.txt" + run_cmd(cmd) + + +def mpl2ass(src): + cmd = f"{MAPLE_EXECUTE_BIN}/maple" + if SAVE_MAPLE_TEMPS: + cmd += ' --save-temps' + if ASAN: + # cmd = f"{cmd} --run=me:mpl2mpl:mplcg --option=\"{OPT_LEVEL} --san={ASAN_FLAG}:{OPT_LEVEL} --no-inline -quiet:{OPT_LEVEL}\" {src}.mpl -o {src}.s > stdout.txt 2> stderr.txt" + # NOTE: we set mplcg -O1 (both O2 and O0 can cause problem) + cmd = f"{cmd} --run=me:mpl2mpl:mplcg --option=\"{OPT_LEVEL} --san={ASAN_FLAG}:{OPT_LEVEL} --no-inline -quiet:-O1\" {src}.mpl -o {src}.s > stdout.txt 2> stderr.txt" + else: + cmd = f"{cmd} --run=me:mpl2mpl:mplcg --option=\"{OPT_LEVEL}:{OPT_LEVEL} -quiet:{OPT_LEVEL}\" {src}.mpl -o {src}.s > stdout.txt 2> stderr.txt" + run_cmd(cmd) + + +def ass2obj(src, dest): + cmd = f"{LINARO_GCC} -c -o {dest} {src}.s" + run_cmd(cmd) + + +def move_tmp_files(src, dest): + # the dest is the output dest file, get by reading the -o option's argument + dest_dir = os.path.dirname(dest) + src_dir = os.path.dirname(src) + if len(src_dir) == 0: + src_dir = '.' + if len(dest_dir) == 0: + dest_dir = '.' + if src_dir == dest_dir: + # work in the same dir, no need to mov + return + tmp_ext_list = ['.ast', '.mpl', '.me.mpl', '.s'] + for tmp_ext in tmp_ext_list: + tmp_file = f'{src}{tmp_ext}' + if os.path.isfile(tmp_file): + if SAVE_MAPLE_TEMPS: + run_cmd(f'mv {tmp_file} {dest_dir}/') + else: + # they use too much disks. + run_cmd(f'rm {tmp_file}') + + +def add_asan_flags(flags): + tokens = flags.split() + necessary_flags = ['-lasan', '-lubsan', '-ldl', '-lpthread', '-lm', '-lrt'] + for flag in necessary_flags: + if flag not in tokens: + tokens.append(flag) + return ' '.join(tokens) + + +def raw_run_gcc(dest, flags): + if ASAN: + flags = add_asan_flags(flags) + cmd = f"{LINARO_GCC} {flags} -o {dest}" + run_cmd(cmd) + + +def compile_only(src, dest, flags): + src2ast(src, flags) + ast2mpl(src) + mpl2ass(src) + ass2obj(src, dest) + move_tmp_files(src, dest) + + +def trim_suffix(src_file): + if src_file.endswith('.c'): + return src_file[:-2] + elif src_file.endswith('.cc'): + return src_file[:-3] + elif src_file.endswith('.cpp'): + return src_file[:-4] + assert False, f"{src_file} has unknown suffix!" + + +def compile_and_link(is_compile, src_list, dest, flags): + # compile every src, then link + obj_list = [] + for src in src_list: + tmp_dest = f'{src}.o' + compile_only(src, tmp_dest, flags) + obj_list.append(tmp_dest) + move_tmp_files(src, dest) + # assemble together + + if not is_compile: + if len(dest) == 0: + dest = trim_suffix(src_list[0]) + flags = ' '.join(obj_list) + ' ' + flags + raw_run_gcc(dest, flags) + else: + assert len(obj_list) == 1 + if len(dest) == 0: + dest = trim_suffix(src_list[0]) + '.o' + if dest != obj_list[0]: + cmd = f"mv {obj_list[0]} {dest}" + run_cmd(cmd) + + +def analyze_arguments(): + is_compile = False + output_file = '' + other_flags = [] + argv = sys.argv + source_files = [] + valid = [True] * len(argv) + idx = 1 + while idx < len(argv): + if argv[idx] == '-c': + is_compile = True + valid[idx] = False + elif argv[idx] == '-o': + output_file = argv[idx + 1] + valid[idx] = False + valid[idx + 1] = False + idx += 1 + elif argv[idx].endswith(('.c', '.cc', '.cpp', '.h')): + source_files.append(argv[idx]) + valid[idx] = False + idx += 1 + + for idx in range(1, len(argv)): + if valid[idx]: + other_flags.append(argv[idx]) + other_flags = ' '.join(other_flags) + return is_compile, source_files, output_file, other_flags + + +def main(): + is_compile, src_list, dest, flags = analyze_arguments() + # print(sys.argv) + # print('SOURCE:', src_list) + # print('OUTPUT:', dest) + # print('FLAGS: ', flags) + if len(src_list) == 0: + raw_run_gcc(dest, flags) + return + compile_and_link(is_compile, src_list, dest, flags) + + +if __name__ == '__main__': + main() + diff --git a/src/mapleall/maple_be/CMakeLists.txt b/src/mapleall/maple_be/CMakeLists.txt index 94d204c846bc3a2adddd14895f0af84fd02b70af..00a75f195994f7c119b083c2f3d06b8b9b88955e 100755 --- a/src/mapleall/maple_be/CMakeLists.txt +++ b/src/mapleall/maple_be/CMakeLists.txt @@ -40,7 +40,7 @@ endif() if(ENABLE_MAPLE_SAN) list(APPEND deps_maple - maple_san) + libmplsan) endif(ENABLE_MAPLE_SAN) diff --git a/src/mapleall/maple_san/.gitignore b/src/mapleall/maple_san/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..7309491b23038bfa0ef32eee087268946ab0ecee --- /dev/null +++ b/src/mapleall/maple_san/.gitignore @@ -0,0 +1,4 @@ +C +bzip2 +gzip +test diff --git a/src/mapleall/maple_san/CMakeLists.txt b/src/mapleall/maple_san/CMakeLists.txt index 89eb8801535b2f89634a7ac54171e3834c1c6281..a091f01c1f0f89555be78a822c5fb3eb6c4bcf4e 100644 --- a/src/mapleall/maple_san/CMakeLists.txt +++ b/src/mapleall/maple_san/CMakeLists.txt @@ -24,6 +24,10 @@ set(src_libmplsan src/ubsan_phases.cpp src/ubsan_bounds.cpp src/san_phase_manager.cpp + src/asan_neighbor_opt.cpp + src/asan_razor.cpp + src/asan_ud.cpp + src/asan_asap.cpp ) add_library(libmplsan SHARED ${src_libmplsan}) diff --git a/src/mapleall/maple_san/README.md b/src/mapleall/maple_san/README.md new file mode 100644 index 0000000000000000000000000000000000000000..750f54039fa7c00e3977df4adf29d7467925c088 --- /dev/null +++ b/src/mapleall/maple_san/README.md @@ -0,0 +1,66 @@ +# Addresee Sanitizer (ASAN) +This work is done by Cybersecurity Group of HKUST (see the webpage of [Prof. Shuai Wang](https://www.cse.ust.hk/~shuaiw/))\ +This directory contains the code for instrumenting memory access operations on MAPLE IR. + +Besides original ASAN instrumentation, we also implement enhancements (see following citations) +of ASAN for improving its efficiency.\ +Please consider cite the following papers +``` +@inproceedings{wagner2015high, + title={High system-code security with low overhead}, + author={Wagner, Jonas and Kuznetsov, Volodymyr and Candea, George and Kinder, Johannes}, + booktitle={2015 IEEE Symposium on Security and Privacy}, + pages={866--879}, + year={2015}, + organization={IEEE} +} + +@inproceedings{zhang2021sanrazor, + title={$\{$SANRAZOR$\}$: Reducing redundant sanitizer checks in $\{$C/C++$\}$ programs}, + author={Zhang, Jiang and Wang, Shuai and Rigger, Manuel and He, Pinjia and Su, Zhendong}, + booktitle={15th USENIX Symposium on Operating Systems Design and Implementation (OSDI 21)}, + pages={479--494}, + year={2021} +} + +@inproceedings{zhang2022debloating, + title={Debloating address sanitizer}, + author={Zhang, Yuchen and Pang, Chengbin and Portokalidis, Georgios and Triandopoulos, Nikos and Xu, Jun}, + booktitle={31st USENIX Security Symposium (USENIX Security 22)}, + pages={4345--4363}, + year={2022} +} +``` + +To enable the ASAN functionality, please modify `$MAPLE_ROOT/src/mapleall/CMakeLists.txt`, and set +``` +OPTION(ENABLE_MAPLE_SAN "Enabling sanitizer functionalities" ON) # the default value is OFF +``` + +To use the ASAN options, please see scripts [here](https://gitee.com/huaijinwang/OpenArkCompiler_ASAN_llvm-15/tree/asan-dev-llvm-15/scripts). + +# Address Sanitizer Runtime +The work was first developed with the LLVM ASAN runtime. While linking the executable, you can use +`-lasan` option to use the LLVM ASAN runtime.\ +While executing the binary, the `libasan.so` is supposed to be available on the host machine, and +to use the environment variable `LD_PRELOAD=` to execute it. + +We also provide a BiSheng ASAN runtime. The source code is available at `$MAPLE_ROOT/src/mrt/libsan`. +Please see the `README.md` in that directory to learn how to build the runtime. After making, you +will see files `libasan.a` and `libasan.so`.\ +To use it, you can statically link the binary by adding `libasan.a` in the `ld/gcc` command, or dynamically +the binary with `libasan.so` + +The SanRazor and ASAP enhancements require an additional runtime library. The source code is +available at `$MAPLE_ROOT/src/mrt/sanrazor`. Please build it and use it in the similar way of +BiSheng ASAN runtime. + +# Contributors +This work was initially done by [Ms. Wei Chen](wchenbt@cse.ust.hk) for the Ver. 1 of OpenArkCompiler. + +[Mr. Huaijin Wang](https://whj0401.github.io/) then ported it to the current version of OpenArkCompiler, and provide the +BiSheng ASAN runtime, SanRazor runtime, and the framework for using ASAP enhancement. + +[Dr. Zhibo Liu](https://monkbai.github.io/), Mr. Yanzuo Chen, and Mr. Wai Kin WONG implemented neighbor memory +access optimization for ASAN, RBTree runtime instrumentation, and SanRazor enhancement, respectively. + diff --git a/src/mapleall/maple_san/include/asan_asap.h b/src/mapleall/maple_san/include/asan_asap.h new file mode 100644 index 0000000000000000000000000000000000000000..85cf8b9a9742c629c00039d1a74cbf6dbb1f14d3 --- /dev/null +++ b/src/mapleall/maple_san/include/asan_asap.h @@ -0,0 +1,53 @@ +#ifdef ENABLE_MAPLE_SAN + +#ifndef ASAN_ASAP_H +#define ASAN_ASAP_H + +#include +#include +#include +#include "san_common.h" +#include "asan_razor.h" + +namespace maple { + +class ASAPStmtKey { +public: + StmtID stmtid; + std::string src_path; + ASAPStmtKey(StmtID stmtid, std::string src_path) : stmtid(stmtid), src_path(src_path){}; + bool operator<(const ASAPStmtKey &rhs) const { + if (this->stmtid == rhs.stmtid) { + return this->src_path < rhs.src_path; + } + return this->stmtid < rhs.stmtid; + } +}; + +enum ASAPMode{ + NUM, + COST +}; + +typedef std::pair ASAPStmtKeyCountPair; +extern std::vector OrderedStmtKeyCountPairs; +extern std::set ASAPAsanRelevantStmtKey; + +extern double ASAPThreshold; +extern std::set ASAPToRemoveAsanIDs; +extern std::map ASAPStmtKey2CostMap; + +// for each module, do it only once +void InitializeOrderedStmtKeyCountPairs(); +void LoadASAPThreshold(ASAPMode mode); +void DecideToBeRemoved(ASAPMode mode); +void SetThisFileASAPToRemoveStmtIDs(std::string filename); + +// call for each function +void ASAPRemoveAsanChecks(std::string filename, BlockNode* block); + +} // namespace maple + +#endif // ASAN_ASAP_H + +#endif // ENABLE_MAPLE_SAN diff --git a/src/mapleall/maple_san/include/asan_config.h b/src/mapleall/maple_san/include/asan_config.h new file mode 100644 index 0000000000000000000000000000000000000000..abde76f9aed2cc51f1e03ed3541ecfe282cc330a --- /dev/null +++ b/src/mapleall/maple_san/include/asan_config.h @@ -0,0 +1,25 @@ +#ifdef ENABLE_MAPLE_SAN + +#ifndef ASAN_CONFIG_H +#define ASAN_CONFIG_H + +#define ENABLE_ASAN_NEIGHBOR_OPT false +#define ASAN_DEBUG_MODE true + +#define ASAN_ASAP_DEFAULT_ASAN_CHECK_COST_THRESHOLD 0.75 +#define ASAN_ASAP_DEFAULT_ASAN_CHECK_NUM_THRESHOLD 0.4 + +#define ASAN_MODULE_NAME_MACRO "MAPLE_ASAN_MODULE_NAME" + +#define ASAN_RAZOR_TRACE_FUNC 2 +#define ASAN_RAZOR_IGNORE_IDMAP_FILE false +#define ASAN_RAZOR_LOG_MACRO "SANRAZOR_LOG" +#define ASAN_RAZOR_DEFAULT_LOG_FILE "sanrazor.log" +#define ASAN_RAZOR_IDMAP_DIR_MACRO "SANRAZOR_IDMAP_DIR" +#define ASAN_RAZOR_DEFAULT_IDMAP_DIR "/tmp/idmap" +#define ASAN_RAZOR_DEFAULT_DELIMITER " // " +#define ASAN_RAZOR_INSTRUMENT_ASAN_CHECKS_ONLY false + +#endif + +#endif // ENABLE_MAPLE_SAN diff --git a/src/mapleall/maple_san/include/asan_function.h b/src/mapleall/maple_san/include/asan_function.h index 2efd880188577b465c49f9ff4492415f61c40492..dee1c4b64bbe14ac04cf90c0b9428c15fab194c4 100644 --- a/src/mapleall/maple_san/include/asan_function.h +++ b/src/mapleall/maple_san/include/asan_function.h @@ -12,6 +12,8 @@ #include "me_ssa.h" #include "mir_module.h" #include "san_common.h" +#include "mpl_logging.h" +#include "asan_config.h" namespace maple { @@ -23,28 +25,41 @@ struct MemoryAccess { BaseNode *ptrOperand; }; +/* for NeighborOpt*/ +struct AsanNodePos { // node position + // there is no node id + // use this struct to locate the target node after [irmapbuild + premeemit] + SrcPosition stmtPos; // <- root stmt + std::vector directions; + Opcode op; + int64_t newSize; +}; + // Accesses sizes are powers of two: 1, 2, 4, 8, 16. class AddressSanitizer { public: - AddressSanitizer(MIRModule &module, PreAnalysis *symbolInteresting) + AddressSanitizer(MIRModule &module, PreAnalysis *symbolInteresting, std::vector targetPos) : module(&module), Mapping(getShadowMapping()), preAnalysis(symbolInteresting) { LongSize = kSizeOfPtr * 8; IntPtrPrim = LongSize == sizeof(int32) ? PTY_i32 : PTY_i64; IntPtrTy = GlobalTables::GetTypeTable().GetPrimType(IntPtrPrim); + skipPos = targetPos; } bool instrumentFunction(MeFunction &F); - void instrumentAddress(StmtNode *InsertBefore, BaseNode *Addr, uint64_t TypeSize, bool IsWrite, + AsanVirtualBlock doInstrumentAddress(StmtNode *InsertBefore, BaseNode *Addr, BlockNode* block, + size_t Alignment, size_t Granularity, uint64_t TypeSize, bool IsWrite); + AsanVirtualBlock instrumentAddress(StmtNode *InsertBefore, BaseNode *Addr, BlockNode* block, uint64_t TypeSize, bool IsWrite, BaseNode *SizeArgument); - void instrumentUnusualSizeOrAlignment(StmtNode *InsertBefore, BaseNode *Addr, uint64_t TypeSize, + AsanVirtualBlock instrumentUnusualSizeOrAlignment(StmtNode *InsertBefore, BaseNode *Addr, BlockNode* block, uint64_t TypeSize, bool IsWrite); private: friend class FunctionStackPoisoner; - void instrumentMop(StmtNode *I, std::vector &memoryAccess); + void instrumentMop(StmtNode *I, std::vector &memoryAccess, BlockNode* block); void initializeCallbacks(const MIRModule &mirModule); @@ -52,7 +67,7 @@ class AddressSanitizer { bool isInterestingAlloca(const UnaryNode &unaryNode); - void instrumentMemIntrinsic(IntrinsiccallNode *stmtNode); + void instrumentMemIntrinsic(IntrinsiccallNode *stmtNode, BlockNode* block); void maybeInsertDynamicShadowAtFunctionEntry(const MeFunction &F); @@ -74,9 +89,7 @@ class AddressSanitizer { BinaryNode *createSlowPathCmp(StmtNode *InsBefore, BaseNode *AddrLong, BaseNode *ShadowValue, uint64_t TypeSize); - void SanrazorProcess(MeFunction &mefunc, std::set &userchecks, - std::map> &brgoto_map, std::map &stmt_to_bbID, - std::map &stmt_id_to_stmt, std::vector &stmt_id_list, int check_env); + void SanrazorProcess(MeFunction &mefunc); struct FunctionStateRAII { AddressSanitizer *Phase; @@ -118,6 +131,23 @@ class AddressSanitizer { std::map ProcessedAllocas; PreAnalysis *preAnalysis; + + /* for NeighborOpt */ + bool neighborOptSwitch = ENABLE_ASAN_NEIGHBOR_OPT; + std::vector skipPos; + + std::map> node2directions; + std::map node2stmt; + void prepareBlockNodeInfo(BlockNode* block, std::vector curDirections); + void prepareInfo(MeFunction &mefunc); + void prepareInfoInternal(StmtNode *stmt, BaseNode *baseNode, std::vector curDirections); + void DumpSkipPos(); + + // return -1 means the check should be removed + // return 0 means opt makes no change + // return >0 means the check size should be changed to a larger value + int64 checkNeighborOpt(BaseNode *node); + /* End for NeighborOpt */ }; } // namespace maple diff --git a/src/mapleall/maple_san/include/asan_interfaces.h b/src/mapleall/maple_san/include/asan_interfaces.h index de336098c70a9c029c20da611bffc97862a323ff..b3565068f7cea34f2eb8a92c259087c9ae5e2e1a 100644 --- a/src/mapleall/maple_san/include/asan_interfaces.h +++ b/src/mapleall/maple_san/include/asan_interfaces.h @@ -11,6 +11,7 @@ namespace maple { const char *const kAsanModuleCtorName = "asan.module_ctor"; const char *const kAsanModuleDtorName = "asan.module_dtor"; const char *const kAsanInitName = "__asan_init"; +const char *const kAsanCovRegisterName = "__san_cov_register"; const char *const kAsanHandleNoReturnName = "__asan_handle_no_return"; const char *const kAsanRegisterGlobalsName = "__asan_register_globals"; const char *const kAsanUnregisterGlobalsName = "__asan_unregister_globals"; @@ -19,6 +20,18 @@ const char *const kAsanReportErrorTemplate = "__asan_report_"; const char *const kAsanShadowMemoryDynamicAddress = "__asan_shadow_memory_dynamic_address"; const char *const kAsanAllocaPoison = "__asan_alloca_poison"; const char *const kAsanAllocasUnpoison = "__asan_allocas_unpoison"; +const char *const kAsanStackCheck = "__asan_check_stack"; +const char* const kAsanStackCheck2 = "__asan_check_stack2"; + +// Sanitizer Coverage +const char* const kSanRazorCov = "__san_cov_trace_pc"; +const char* const kSanRazorCov2 = "__san_cov_trace_pc2"; +const char* const kSanRazorCovDump = "__san_cov_dump"; +const char* const kSanRazorCovAppendInfo = "__san_cov_append_info"; + +// Sanitizer initializer and finisher caller +const char* const kAsanInitializer = "__cxx_global_var_init"; +const char* const kAsanFinisher = "__cxx_global_var_fini"; } // namespace maple #endif // MAPLE_SAN_INCLUDE__ASAN_MESSAGES_H diff --git a/src/mapleall/maple_san/include/asan_module.h b/src/mapleall/maple_san/include/asan_module.h index 45bd73626dd31071530a280f09ae321e84aac581..bef1154512a3c4e259710e8cb61351dbf09fff53 100644 --- a/src/mapleall/maple_san/include/asan_module.h +++ b/src/mapleall/maple_san/include/asan_module.h @@ -23,11 +23,27 @@ class ModuleAddressSanitizer { } bool instrumentModule(); + std::string GetModuleSymbolPostfix() const; + std::string GetSrcFilePath() const; + + MIRSymbol* GetSymbolFromName(std::string name); + void AddGlobalChar(std::string name, char value, bool isInit); + void AddGlobalInt(std::string name, int64_t value, bool isInit); + void AddGlobalUInt(std::string name, uint64_t value, bool isInit); + void AddGlobalString(std::string name, std::string value, bool isInit); + void AddGlobalUIntArray(std::string name, std::vector values, bool isInit); + void AppendStmtToGlobalInit(StmtNode *node); + void AddSymbol(MIRSymbol* sym); + MIRModule* GetMIRModule() const {return module;} + MIRFunction *GetOrInsertFunction(const char *name, MIRType *retType, std::vector argTypes); private: + MIRFunction* GetGlobalCtor() const; + MIRFunction* GetGlobalDtor() const; void initializeCallbacks(); void GetGlobalSymbolUsage(); + BaseNode* GetSanRazorAppendInfoNode(); bool InstrumentGlobals(BlockNode *ctorToBeInserted); bool ShouldInstrumentGlobal(MIRSymbol *var); @@ -45,7 +61,6 @@ class ModuleAddressSanitizer { } MIRModule *module; - PrimType IntPtrPrim; MIRType *IntPtrTy; ShadowMapping Mapping; diff --git a/src/mapleall/maple_san/include/asan_phases.h b/src/mapleall/maple_san/include/asan_phases.h index daa8dae15a5790f2e6ddadbb7f8df7d608cb90b4..9acf5aaa0a74e3342b2e630e56e208ae75d425ce 100644 --- a/src/mapleall/maple_san/include/asan_phases.h +++ b/src/mapleall/maple_san/include/asan_phases.h @@ -4,6 +4,8 @@ #define MAPLE_SAN_INCLUDE_ASAN_PHASES_H #include +#include +#include "asan_function.h" #include "maple_phase.h" #include "san_common.h" #include "maple_phase.h" @@ -19,7 +21,46 @@ namespace maple { PreAnalysis* result = nullptr; MAPLE_FUNC_PHASE_DECLARE_END + MAPLE_FUNC_PHASE_DECLARE_BEGIN(MEDoNeighborOpt, MeFunction) + std::vector GetResult(); + private: + void GetAnalysisDependence(maple::AnalysisDep &aDep) const override; + + /* *Neighbor Checks Optimization* Begin */ + + // for each indirect memory aceess statement, get the def of its address expression + bool PreAnalysis(MeFunction &F); + + void NeighborOptEntry(BB *bb); + void TraverseNodes(StmtNode *stmt, BaseNode *baseNode, std::vector curDirections); + const StmtNode * findDefStmt(BaseNode *baseNode, const VersionSt &vst); + void filterRemovableMemAccesses(); + + AddrofNode * getAddrofNode(BaseNode * baseNode); + uint32 getOffset(BaseNode *baseNode); + uint32 getStructOffset(BaseNode * node, MIRPtrType *ptrType, FieldID fieldID); + uint32 getArrayOffset(BaseNode * node, MIRPtrType *ptrType, FieldID fieldID); + void DumpIntermediateResult(); + void DumpResult(); + static bool cmp(std::pair& a, std::pair& b) {return a.second < b.second;} + std::vector > sortOffset(); + + + std::map node2offset; + std::map node2size; + std::map node2def; + std::map> def2nodes; + + std::map> node2directions; + std::map node2stmt; + + std::vector> result; // pos , size; + + std::vector results; + MAPLE_FUNC_PHASE_DECLARE_END + MAPLE_FUNC_PHASE_DECLARE(MEDoAsan, MeFunction) + MAPLE_FUNC_PHASE_DECLARE(MEASanRazor, MeFunction) } // namespace maple diff --git a/src/mapleall/maple_san/include/asan_razor.h b/src/mapleall/maple_san/include/asan_razor.h new file mode 100644 index 0000000000000000000000000000000000000000..df284e1c19cfd25a28b785b3f9c6f3f81f43b0d5 --- /dev/null +++ b/src/mapleall/maple_san/include/asan_razor.h @@ -0,0 +1,174 @@ +#ifdef ENABLE_MAPLE_SAN + +#ifndef MAPLE_SAN_ASAN_RAZOR_H +#define MAPLE_SAN_ASAN_RAZOR_H + +#include "asan_config.h" +#include "asan_function.h" +#include "asan_mapping.h" +#include "asan_module.h" +#include "me_cfg.h" +#include "me_function.h" +#include "me_ssa.h" +#include "mir_module.h" +#include "mpl_logging.h" +#include "san_common.h" + +#include "asan_ud.h" + +namespace maple { + +typedef float AsanBlockCost; + +class ASanRazorStmtKey; +extern std::string SanRazorCounterName; +extern std::string SanRazorSizeName; +extern std::string SanRazorIsInitializedName; +extern std::string SanRazorFilenameName; +extern std::atomic SanRazorSize; +extern std::string SanRazorCheckIDMapFileName; +extern bool SanRazorIs2ndCompile; +// store from CallSiteID to AsanRazorCheckID +extern std::map SanRazorCheckIDMap; +extern std::map SanRazorStmtKeyCheckIDMap; +extern std::map SanRazorStmtKey2CostMap; +extern std::set AsanRelevanCheckIDs; +extern std::set SanRazorAsanRelevantStmtKey; +void InitializeSanRazorGlobalNames(ModuleAddressSanitizer& AsanModule); +void SetSanRazorGlobals(ModuleAddressSanitizer& AsanModule); +std::string GetSanRazorCheckIDMapDirPath(); +void DumpSanRazorCheckIDMap(std::string filename); +void AddASanRazorSignalHandlerRegister(ModuleAddressSanitizer& AsanModule); +void AddSanRazorAppendInfo(ModuleAddressSanitizer& AsanModule); +void LoadSanRazorCheckIDMap(std::string filename); +void LoadSanRazorStmtKeyCheckIDMap(std::string filename); +extern std::string SanRazorLineDelimiter; +extern std::map SanRazorStmtCountMap; +void LoadSanRazorLog(); +void LoadAllSanRazorStmtKeyCheckIDMaps(); +void SanRazorClearEverything(); + +class ASanRazorStmtKey { +public: + size_t callSiteId; + std::string src_path; + + ASanRazorStmtKey(){}; + ASanRazorStmtKey(size_t callSiteId, std::string src_path) : callSiteId(callSiteId), src_path(src_path){}; + + bool operator<(const ASanRazorStmtKey &rhs) const { + if (this->callSiteId == rhs.callSiteId) { + return this->src_path < rhs.src_path; + } + return this->callSiteId < rhs.callSiteId; + } + + bool operator>(const ASanRazorStmtKey &rhs) const { + if (this->callSiteId == rhs.callSiteId) { + return this->src_path > rhs.src_path; + } + return this->callSiteId > rhs.callSiteId; + } + + bool operator==(const ASanRazorStmtKey &rhs) const { + if (this->callSiteId == rhs.callSiteId) { + return this->src_path == rhs.src_path; + } + return false; + } + + void SetCallSiteID(const size_t callSiteId) { + this->callSiteId = callSiteId; + } + + void SetSrcPath(const std::string src_path) { + this->src_path = src_path; + } + + StmtID GetStmtID() const { + AsanRazorCheckID id = SanRazorCheckIDMap[this->callSiteId]; + return id >> 1; + } + + bool IsTrueBranch() const { + AsanRazorCheckID id = SanRazorCheckIDMap[this->callSiteId]; + return (id & 0x1) > 0; + } +}; + +class ASanRazor { + public: + ASanRazor(); + + bool InstrumentFunctionForProfiling(MeFunction &F, const AsanVirtualBlockList &AsanBlocks); + bool RemoveRedundantInstrumentation(MeFunction &F, const AsanVirtualBlockList &AsanBlocks); + void ClearSanRazorCoverageStmtNodes(); + + private: + MIRModule *module; + MeFunction *mefunc; + MIRFunction *mirfunc; + std::string srcpath; + UStrIdx srcpathidx; + std::map stmtID2node; + std::set asanStmtIDSet; + std::map labelIdx2LabelNode; + std::map labelIdx2BlockNode; + std::map stmtID2asanBlock; + std::vector asanCovStmts; // if SanRazorIs2ndCompile, remove them all + std::map stmtID2BlockNode; + + void SetFunc(MeFunction &F) { + this->module = &(F.GetMIRModule()); + this->mefunc = &F; + this->mirfunc = F.GetMirFunc(); + this->srcpath = AsanModulePtr->GetSrcFilePath(); + this->srcpathidx = GlobalTables::GetUStrTable().GetOrCreateStrIdxFromName(this->srcpath); + } + + void _GetLabelIdx2LabelNodeFromBlock(BlockNode *block); + std::map GetLabelIdx2LabelNode(); + std::map GetLabelIdx2BlockNode(); + void _GetStmtID2node(BlockNode* block); + std::map GetStmtID2node(); + AsanVirtualBlock* GetAsanVirtualBlockWithIDBlock(const AsanVirtualIDBlock &asanBlock); + std::set GetAsanStmtIDSet(const AsanVirtualBlockList &AsanBlocks); + void PreprocessFunctionForProfiling(MeFunction &F, const AsanVirtualBlockList &AsanBlocks) { + this->SetFunc(F); + this->GetStmtID2node(); + this->GetAsanStmtIDSet(AsanBlocks); + this->GetLabelIdx2LabelNode(); + } + + CallNode *GetCallAsanCovStmt(AsanRazorCheckID stmt_id); + CallNode* GetCallDumpAsanCovStmt(); + AsanRazorCheckID GetTrueBranchID(StmtNode* stmt) { + return (stmt->GetStmtID() << 1) | 0x1; + } + AsanRazorCheckID GetFalseBranchID(StmtNode* stmt) { + return (stmt->GetStmtID() << 1); + } + void InstrumentAsanCheckForProfiling(StmtNode *stmt, const AsanVirtualBlock &asanBlock, BlockNode *block); + void InstrumentIfStmtForProfiling(IfStmtNode* ifStmt); + void InstrumentWhileStmtForProfiling(WhileStmtNode* whileStmt, BlockNode* block); + void InstrumentDoloopForProfiling(DoloopNode* doloop, BlockNode* block); + void InstrumentCondGotoForProfiling(CondGotoNode* condGoto, BlockNode* block, bool isTrue); + void InstrumentForeachelemForProfiling(ForeachelemNode* foreachelem, BlockNode* block); + bool InstrumentBlockForProfiling(BlockNode *block); + + + UDProfile udchain; + std::set UserCheckStmtID; + std::set SanCheckStmtID; + std::set GetRemoveAsanID(); + san_struct Get_san_struct(StmtID id); + + + +}; + +} // namespace maple + +#endif // MAPLE_SAN_ASAN_RAZOR_H + +#endif // ENABLE_MAPLE_SAN \ No newline at end of file diff --git a/src/mapleall/maple_san/include/asan_stackvar.h b/src/mapleall/maple_san/include/asan_stackvar.h index 2b7c726fd13d88f98ffad26d16bf57ea1c13cf6d..f8756932d3ef19ab557d97b8f5af2ee8ec4e9188 100644 --- a/src/mapleall/maple_san/include/asan_stackvar.h +++ b/src/mapleall/maple_san/include/asan_stackvar.h @@ -36,20 +36,27 @@ class FunctionStackPoisoner { void replaceAllUsesWith(MIRSymbol *oldVar, MIRSymbol *newVar); - void handleDynamicAllocaCall(ASanDynaVariableDescription *AI); + BlockNode *replaceAllUsesOfBlockNode(MIRSymbol *oldVar, MIRSymbol *newVar, BlockNode *block); - MIRSymbol *createAllocaForLayout(StmtNode *insBefore, MIRBuilder *mirBuilder, const ASanStackFrameLayout &L); + void handleDynamicAllocaCall(ASanDynaVariableDescription *AI); - void unpoisonDynamicAllocasBeforeInst(StmtNode *InstBefore); + MIRSymbol *createAllocaForLayout(StmtNode *insBefore, MIRBuilder *mirBuilder, const ASanStackFrameLayout &L, + MIRSymbol** asan_tmp_ptr); - void copyToShadow(const std::vector ShadowMask, const std::vector ShadowBytes, MIRBuilder *mirBuilder, - BaseNode *ShadowBase, StmtNode *InsBefore); + void unpoisonDynamicAllocasBeforeInst(StmtBlockNodePair stmtBlockPair); - void copyToShadow(const std::vector ShadowMask, const std::vector ShadowBytes, size_t Begin, size_t End, + void copyToShadow(const std::vector ShadowMask, const std::vector ShadowBytes, MIRBuilder *mirBuilder, BaseNode *ShadowBase, StmtNode *InsBefore); - void copyToShadowInline(const std::vector ShadowMask, const std::vector ShadowBytes, size_t Begin, size_t End, - MIRBuilder *mirBuilder, BaseNode *ShadowBase, StmtNode *InsBefore); + void copyToShadow(const std::vector ShadowMask, const std::vector ShadowBytes, size_t Begin, + size_t End, MIRBuilder *mirBuilder, BaseNode *ShadowBase, StmtNode *InsBefore); + + void copyToShadowInline(const std::vector ShadowMask, const std::vector ShadowBytes, size_t Begin, + size_t End, MIRBuilder *mirBuilder, BaseNode *ShadowBase, StmtNode *InsBefore); + + void CleanStackShadow(const std::vector ShadowMask, MIRBuilder *mirBuilder, BaseNode *ShadowBase, + StmtNode *InsBefore); + void CleanStackShadowBytes(uint32 bytes, uint64 offset, MIRBuilder *mirBuilder, BaseNode* ShadowBase, StmtNode *InsBefore); bool isFuncCallArg(const MIRSymbol *const symbolPtr) const; bool isFuncCallArg(const std::string symbolName) const; @@ -69,7 +76,7 @@ class FunctionStackPoisoner { unsigned StackAlignment; MIRSymbol *DynamicAllocaLayout = nullptr; - std::vector RetVec; + std::vector RetVec; std::vector stackVariableDesc; std::vector dynamicAllocaDesc; std::set callArgSymbols; @@ -82,10 +89,32 @@ class FunctionStackPoisoner { bool HasReturnsTwiceCall = false; std::map isUsedInAlloca; + private: void collectLocalVariablesWithoutAlloca(); void collectLocalVariablesWithAlloca(); void collectDescFromUnaryStmtNode(UnaryStmtNode &assignNode); + + BaseNode* TransformAddrofNode(MIRSymbol *oldVar, MIRSymbol *newVar, AddrofNode *baseNode); + BaseNode* TransformDassignNode(MIRSymbol *oldVar, MIRSymbol *newVar, DassignNode *baseNode); + BaseNode* TransformDreadNode(MIRSymbol *oldVar, MIRSymbol *newVar, DreadNode *baseNode); + BaseNode* TransformDassignoffNode(MIRSymbol *oldVar, MIRSymbol *newVar, DassignoffNode *baseNode); + + // handle SSANode, to replace dynamic_cast + AddrofNode *getAddrofNode(BaseNode *baseNode) { + SSANode *ssaCheck = dynamic_cast(baseNode); + if (ssaCheck != nullptr) + return dynamic_cast(dynamic_cast(baseNode)->GetNoSSANode()); + else + return dynamic_cast(baseNode); + } + DreadNode *getDreadNode(BaseNode *baseNode) { + SSANode *ssaCheck = dynamic_cast(baseNode); + if (ssaCheck != nullptr) + return dynamic_cast(dynamic_cast(baseNode)->GetNoSSANode()); + else + return dynamic_cast(baseNode); + } }; } // namespace maple #endif // MAPLE_SAN_ASAN_STACKVAR_H diff --git a/src/mapleall/maple_san/include/asan_ud.h b/src/mapleall/maple_san/include/asan_ud.h new file mode 100644 index 0000000000000000000000000000000000000000..060984ea9968690ebb33df483b72a6b31e2533cc --- /dev/null +++ b/src/mapleall/maple_san/include/asan_ud.h @@ -0,0 +1,196 @@ +#ifdef ENABLE_MAPLE_SAN + +#ifndef MAPLE_SAN_ASAN_UD_H +#define MAPLE_SAN_ASAN_UD_H + +#include "asan_config.h" +#include "asan_function.h" +#include "asan_mapping.h" +#include "me_cfg.h" +#include "me_function.h" +#include "me_ssa.h" +#include "mir_module.h" +#include "mpl_logging.h" + +// Handle asan struct + +#include "san_common.h" +#include "asan_interfaces.h" +#include "me_ir.h" +#include "mir_builder.h" +#include "string_utils.h" + + + +namespace maple { + +typedef uint8_t OpCodeID; +typedef int32_t RegID; +typedef uint32_t VarID; + +// Struct to contain the information for performing set check +// For elimination of san check +struct set_check { + std::vector opcode; // 1. opcode enum Opcode : uint8 + std::vector register_terminal; // 2. register_terminal -> cannot further expand + std::stack register_live; // 3. register_live -> for further expansion + std::vector var_terminal; // 4. var_terminal -> cannot further expand + std::stack var_live; // 5. var_live -> for further expansion + std::vector const_int64; // 6. const, we only track int64 or kConstInt + std::vector const_str; // 7. const str ,we only store the index + std::vector type_num; // 8. Type, but we will just track the fieldID for simplicity iread->GetFieldID() + std::vector dassignID; // 9. stmtID, we only track the stmtID for simplicity +}; + +struct san_struct { + StmtID stmtID; + AsanRazorCheckCount tot_ctr; + AsanRazorCheckCount false_ctr;//False + AsanRazorCheckCount true_ctr;//True +}; + +// Helper function +template +void print_stack(std::stack &st) { + if (st.empty()) return; + T x = st.top(); + LogInfo::MapleLogger() << x << ","; + st.pop(); + print_stack(st); + st.push(x); +} + +template +std::vector inverseLookup(const std::map& map, const V& value) { + std::vector keys; + // Iterate through the map + for (const auto& pair : map) { + if (pair.second == value) { + keys.push_back(pair.first); + break; + } + } + return keys; +} + +class UDProfile { + + public: + UDProfile(); + + void initME(MeFunction &mefunc, std::set asanStmtIDSet); + void GetUD(StmtNode &stmt); + void Dump(); + void GetMatched(); + set_check* GetSetCheck(StmtID stmtID); + bool sat_check(const set_check& a, const set_check& b); + bool dynamic_sat(const san_struct& a, const san_struct& b, bool SCSC); + + std::map dassignID2dep; + void CreateDassignIDMap(); + + bool UnmatchUserCheck(StmtID UcStmtID, std::set asanStmtIDSet); + // san_struct Get_san_struct(StmtID id); + + template + bool compareVectors(const std::vector& a, const std::vector& b) { + // I am not sure why the original implementation use + // sets to compare the equivalence of two vectors (peformance?) + // Anyway, I think we may not delete the following code now + // if (a.size() != b.size()) + // { + // return false; + // } + // std::sort(a.begin(), a.end()); + // std::sort(b.begin(), b.end()); + // return (a == b); + std::set set_a(a.begin(), a.end()); + std::set set_b(b.begin(), b.end()); + return (set_a == set_b); + } + + set_check commit(set_check old, set_check latest) { + old.opcode.insert(old.opcode.end(), latest.opcode.begin(), latest.opcode.end()); + old.register_terminal.insert(old.register_terminal.end(), latest.register_terminal.begin(), + latest.register_terminal.end()); + old.var_terminal.insert(old.var_terminal.end(), latest.var_terminal.begin(), latest.var_terminal.end()); + old.const_int64.insert(old.const_int64.end(), latest.const_int64.begin(), latest.const_int64.end()); + old.const_str.insert(old.const_str.end(), latest.const_str.begin(), latest.const_str.end()); + old.type_num.insert(old.type_num.end(), latest.type_num.begin(), latest.type_num.end()); + old.dassignID.insert(old.dassignID.end(), latest.dassignID.begin(), latest.dassignID.end()); + return old; + } + + static void set_check_print_dep(set_check dep) { + LogInfo::MapleLogger() << "\nTerm Var: "; + for (auto var_tmp : dep.var_terminal) { + LogInfo::MapleLogger() << int(var_tmp) << ","; + } + // LogInfo::MapleLogger() << "\nConst: "; + // for (auto var_tmp : dep.const_int64) { + // LogInfo::MapleLogger() << var_tmp.GetRawData() << ","; + // } + LogInfo::MapleLogger() << "\n"; + } + + private: + + MeFunction *mefunc; + + std::set reg_order; + std::map> reg_to_stmt; + + std::set var_order; + std::map> var_to_stmt; + + std::map StmtID2Check; + + // The std map from asan razor + std::set asanStmtIDSet; + + // UD stuff + /* We would like to collect the use and define info + by traversing the IR with sth 'above' it. + */ + void recursion(BaseNode *stmt, std::vector &stmt_reg, + std::vector &stmt_const); + StmtNode *retLatest_Regassignment(StmtNode *stmt, int32 register_number); + StmtNode *retLatest_Varassignment(StmtNode *stmt, uint32 var_number); + void dep_iassign_expansion(IassignNode *iassign, set_check &dep); + void dep_constval_expansion(ConstvalNode *constValNode, set_check &dep); + void dep_dassign_expansion(DassignNode *dassign, set_check &dep); + void gen_register_dep(StmtNode *stmt, set_check &br_tmp); + void dep_expansion(BaseNode *stmt, set_check &dep); + + + bool IsCallNode(Opcode opcode) { + // Check if it is a callnode statement + switch (opcode) { + case OP_call: + case OP_virtualcall: + case OP_virtualicall: + case OP_superclasscall: + case OP_interfacecall: + case OP_interfaceicall: + case OP_customcall: + case OP_polymorphiccall: + case OP_callassigned: + case OP_virtualcallassigned: + case OP_virtualicallassigned: + case OP_superclasscallassigned: + case OP_interfacecallassigned: + case OP_interfaceicallassigned: + case OP_customcallassigned: + case OP_polymorphiccallassigned: + return true; + default: + return false; + } + } +}; + +} + +#endif // MAPLE_SAN_ASAN_UD_H + +#endif // ENABLE_MAPLE_SAN \ No newline at end of file diff --git a/src/mapleall/maple_san/include/san_common.h b/src/mapleall/maple_san/include/san_common.h index 425a8a08719f0af1e3467ebcef0edcbc7fb96342..48076dd9d857357d7d0abaceb7374bdb2f18bd84 100644 --- a/src/mapleall/maple_san/include/san_common.h +++ b/src/mapleall/maple_san/include/san_common.h @@ -9,6 +9,7 @@ #include "mir_module.h" #include "mir_nodes.h" #include "types_def.h" +#include namespace maple { @@ -22,6 +23,9 @@ constexpr uint8 kSizeOfPtr = 4; #error "Unsupported target" #endif +typedef std::pair StmtBlockNodePair; +std::vector getAllOrderedStmtNodeList(BlockNode* block); + struct ASanStackVariableDescription { std::string Name; // Name of the variable that will be displayed by asan size_t Size; // Size of the variable in bytes. @@ -56,26 +60,6 @@ struct ASanStackFrameLayout { size_t FrameSize; // Size of the frame in bytes. }; -// Struct to contain the information for performing set check -// For elimination of san check -struct set_check { - std::vector opcode; // 1. opcode enum Opcode : uint8 - std::vector register_terminal; // 2. register_terminal -> cannot further expand - std::stack register_live; // 3. register_live -> for further expansion - std::vector var_terminal; // 4. var_terminal -> cannot further expand - std::stack var_live; // 5. var_live -> for further expansion - std::vector const_int64; // 6. const, we only track int64 or kConstInt - std::vector const_str; // 7. const str ,we only store the index - std::vector type_num; // 8. Type, but we will just track the fieldID for simplicity iread->GetFieldID() -}; - -struct san_struct { - int stmtID; - int tot_ctr; - int l_ctr; - int r_ctr; -}; - class PreAnalysis : public AnalysisResult { public: PreAnalysis(MemPool &memPoolParam) : AnalysisResult(&memPoolParam){}; @@ -85,6 +69,66 @@ class PreAnalysis : public AnalysisResult { std::vector usedInAddrof; }; +typedef uint64_t AsanRazorCheckID; +typedef uint64_t AsanRazorCheckCount; +typedef uint32_t StmtID; + +struct AsanVirtualBlock { + StmtNode* first; + StmtNode* last; + // for sanrazor, call coverage interface before crash + // sometimes, a check has multiple crash branches, + // we need to insert sanrazor interface for all of them + std::vector crashes; + StmtNode* normal; // for sanrazor, normal branch is just before normal +}; + +// this is the id version for use between phases +struct AsanVirtualIDBlock { + uint32 first; + uint32 last; + // for sanrazor, call coverage interface before crash + // sometimes, a check has multiple crash branches, + // we need to insert sanrazor interface for all of them + std::vector crashes; + uint32 normal; // for sanrazor, normal branch is just before normal +}; + +class AsanVirtualBlockList { +public: + MapleVector* blocks; + MapleVector* idblocks; + + AsanVirtualBlockList(MIRModule &m) { + blocks = m.GetMemPool()->New>(m.GetMPAllocator().Adapter()); + idblocks = m.GetMemPool()->New>(m.GetMPAllocator().Adapter()); + } + ~AsanVirtualBlockList() { + blocks->clear(); + idblocks->clear(); + } + inline AsanVirtualIDBlock GetAsanIDBlock(const AsanVirtualBlock &block) { + AsanVirtualIDBlock ret; + ret.first = block.first->GetStmtID(); + ret.last = block.last->GetStmtID(); + ret.normal = block.normal->GetStmtID(); + for (auto crash : block.crashes) { + ret.crashes.push_back(crash->GetStmtID()); + } + return ret; + } + void push_back(const AsanVirtualBlock &block) { + blocks->push_back(block); + idblocks->push_back(GetAsanIDBlock(block)); + } +}; + +// Initialize the list pointer while processing +extern AsanVirtualBlockList* AsanBlockListPtr; + +class ModuleAddressSanitizer; +extern ModuleAddressSanitizer* AsanModulePtr; + static const size_t kMinAlignment = 16; static const unsigned kAllocaRzSize = 32; static const size_t kNumberOfAccessSizes = 5; @@ -136,28 +180,10 @@ MIRSymbol *getOrCreateSymbol(MIRBuilder *mirBuilder, const TyIdx tyIdx, const st ASanStackFrameLayout ComputeASanStackFrameLayout(std::vector &Vars, size_t Granularity, size_t MinHeaderSize); + +CallNode* CreateStackCheck(MIRBuilder *mirBuilder, BaseNode *baseNode, BaseNode *sizeNode); // Start of Sanrazor int SANRAZOR_MODE(); -CallNode *retCallCOV(const MeFunction &func, int bb_id, int stmt_id, int br_true, int type_of_check); -void recursion(BaseNode *stmt, std::vector &stmt_reg); -bool isReg_redefined(BaseNode *stmt, std::vector &stmt_reg); -bool isVar_redefined(BaseNode *stmt, std::vector &stmt_reg); -void dep_expansion(BaseNode *stmt, set_check &dep, std::map> reg_to_stmt, - std::map> var_to_stmt, const MeFunction &func); -void print_dep(set_check dep); -template -void print_stack(std::stack &st); -template -bool compareVectors(const std::vector& a, const std::vector& b); -int getIndex(std::vector v, StmtNode *K); -StmtNode *retLatest_Regassignment(StmtNode *stmt, int32 register_number); -StmtNode *retLatest_Varassignment(StmtNode *stmt, uint32 var_number); -set_check commit(set_check old, set_check latest); -void gen_register_dep(StmtNode *stmt, set_check &br_tmp, std::map> reg_to_stmt, - std::map> var_to_stmt, const MeFunction& func); -bool sat_check(const set_check& a, const set_check& b); -std::map gen_dynmatch(std::string file_name); -bool dynamic_sat(const san_struct& a, const san_struct& b, bool SCSC); } // end namespace maple #endif // MAPLE_SAN_INCLUDE_SAN_COMMON_H diff --git a/src/mapleall/maple_san/include/san_phase_manager.h b/src/mapleall/maple_san/include/san_phase_manager.h index 1ac31e75974e578d57e174a3fbb43b3d44b5ba82..942b301bcd83079f550e64b703da6f9843add2d2 100644 --- a/src/mapleall/maple_san/include/san_phase_manager.h +++ b/src/mapleall/maple_san/include/san_phase_manager.h @@ -19,7 +19,9 @@ public: std::string PhaseName() const override; ~MEModuleDoAsan() override {} private: + bool SanRazorEnabled() const; bool FuncLevelRun(MeFunction &meFunc, AnalysisDataManager &serialADM); + bool FuncLevelSanRazorRun(MeFunction &meFunc, AnalysisDataManager &serialADM); void GetAnalysisDependence(AnalysisDep &aDep) const override; void DoPhasesPopulate(const MIRModule &mirModule); diff --git a/src/mapleall/maple_san/src/asan_asap.cpp b/src/mapleall/maple_san/src/asan_asap.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ff6604532cba70269e4adb3c8bc9d0303482193f --- /dev/null +++ b/src/mapleall/maple_san/src/asan_asap.cpp @@ -0,0 +1,225 @@ +#ifdef ENABLE_MAPLE_SAN + +#include "asan_config.h" +#include "asan_asap.h" +#include "san_common.h" +#include "asan_razor.h" + +namespace maple { + +double ASAPThreshold; +std::vector OrderedStmtKeyCountPairs; +std::set ASAPAsanRelevantStmtKey; +std::map ASAPStmtKey2CostMap; +std::set ASAPToRemoveAsanIDs; +std::set ASAPToRemoveStmtIDs; + +void InitializeOrderedStmtKeyCountPairs() { + OrderedStmtKeyCountPairs.clear(); + std::map tmp; + for (auto &pair : SanRazorStmtCountMap) { + ASAPStmtKey key(pair.first.GetStmtID(), pair.first.src_path); + if (tmp.find(key) != tmp.end()) { + tmp[key] += pair.second; + } else { + tmp[key] = pair.second; + } + if (SanRazorAsanRelevantStmtKey.find(pair.first) != SanRazorAsanRelevantStmtKey.end()) { + ASAPAsanRelevantStmtKey.insert(key); + ASAPStmtKey2CostMap[key] = 1.0; + } else { + ASAPStmtKey2CostMap[key] += 1.0; + } + } + for (auto &pair : tmp) { + OrderedStmtKeyCountPairs.push_back(pair); + } + // sort in reverse order + std::sort(OrderedStmtKeyCountPairs.begin(), OrderedStmtKeyCountPairs.end(), + [](const ASAPStmtKeyCountPair &a, const ASAPStmtKeyCountPair &b) { return a.second > b.second; }); +} + +void LoadASAPThresholdNumMode() { + char *threshold = std::getenv("ASAP_THRESHOLD"); + if (threshold == nullptr) { + ASAPThreshold = ASAN_ASAP_DEFAULT_ASAN_CHECK_NUM_THRESHOLD; + } else { + ASAPThreshold = std::atof(threshold); + } +} + +void LoadASAPThresholdCostMode() { + char *threshold = std::getenv("ASAP_THRESHOLD"); + if (threshold == nullptr) { + ASAPThreshold = ASAN_ASAP_DEFAULT_ASAN_CHECK_COST_THRESHOLD; + } else { + ASAPThreshold = std::atof(threshold); + } +} + +void LoadASAPThreshold(ASAPMode mode) { + switch (mode) { + case ASAPMode::NUM: + LoadASAPThresholdNumMode(); + break; + case ASAPMode::COST: + LoadASAPThresholdCostMode(); + break; + default: + LogInfo::MapleLogger() << "Invalid ASAP mode\n"; + break; + } +} + +void DecideToBeRemovedNumMode() { + // Note this function removes the asan checks of the whole program, + // rather than a single source file + size_t num_asan_checks = 0; + for (auto &pair : OrderedStmtKeyCountPairs) { + if (ASAPAsanRelevantStmtKey.find(pair.first) == ASAPAsanRelevantStmtKey.end()) { + continue; + } + num_asan_checks++; + } + size_t ASAPThreshold_abs = (size_t)(num_asan_checks * ASAPThreshold); + if (ASAN_DEBUG_MODE) { + LogInfo::MapleLogger() << "To remove following checks:\n" << std::hex; + } + size_t del_sum = 0; + size_t idx = 0; + while (del_sum <= ASAPThreshold_abs && idx < OrderedStmtKeyCountPairs.size()) { + auto& pair = OrderedStmtKeyCountPairs[idx]; + // Here we merely consider the asan relevant stmts + if (ASAPAsanRelevantStmtKey.find(pair.first) == ASAPAsanRelevantStmtKey.end()) { + idx++; + continue; + } + del_sum++; + idx++; + ASAPToRemoveAsanIDs.insert(pair.first); + if (ASAN_DEBUG_MODE) { + LogInfo::MapleLogger() << pair.first.src_path << " " << pair.first.stmtid << " " << pair.second << "\n"; + } + } + if (ASAN_DEBUG_MODE) { + LogInfo::MapleLogger() << std::dec << "To delete " << ASAPToRemoveAsanIDs.size() << "(" << \ + ((double)ASAPToRemoveAsanIDs.size() / (double)num_asan_checks) << ")" << \ + " asan checks\n"; + } +} + +void DecideToBeRemovedCostMode() { + // Note this function removes the asan checks of the whole program, + // rather than a single source file + long double total = 0.0; + size_t num_asan_checks = 0; + for (auto &pair : OrderedStmtKeyCountPairs) { + if (ASAPAsanRelevantStmtKey.find(pair.first) == ASAPAsanRelevantStmtKey.end()) { + continue; + } + num_asan_checks++; + if (ASAPStmtKey2CostMap.find(pair.first) == ASAPStmtKey2CostMap.end()) { + total += pair.second; + } else { + total += pair.second * ASAPStmtKey2CostMap[pair.first]; + } + } + size_t idx = 0; + long double del_sum = 0.0; + double ASAPThreshold_abs = total * ASAPThreshold; + if (ASAN_DEBUG_MODE) { + LogInfo::MapleLogger() << "To remove following checks:\n" << std::hex; + } + while (del_sum < ASAPThreshold_abs && idx < OrderedStmtKeyCountPairs.size()) { + auto& pair = OrderedStmtKeyCountPairs[idx]; + // Here we merely consider the asan relevant stmts + if (ASAPAsanRelevantStmtKey.find(pair.first) == ASAPAsanRelevantStmtKey.end()) { + idx++; + continue; + } + if (ASAPStmtKey2CostMap.find(pair.first) == ASAPStmtKey2CostMap.end()) { + del_sum += pair.second; + } else { + del_sum += pair.second * ASAPStmtKey2CostMap[pair.first]; + } + idx++; + ASAPToRemoveAsanIDs.insert(pair.first); + if (ASAN_DEBUG_MODE) { + LogInfo::MapleLogger() << pair.first.src_path << " " << pair.first.stmtid << " " << pair.second << "\n"; + } + } + if (ASAN_DEBUG_MODE) { + LogInfo::MapleLogger() << std::dec << "To delete " << ASAPToRemoveAsanIDs.size() << "(" << \ + ((double)ASAPToRemoveAsanIDs.size() / (double)num_asan_checks) << ")" << \ + " asan checks\n"; + } +} + +void DecideToBeRemoved(ASAPMode mode) { + switch (mode) { + case ASAPMode::NUM: + DecideToBeRemovedNumMode(); + break; + case ASAPMode::COST: + DecideToBeRemovedCostMode(); + break; + default: + LogInfo::MapleLogger() << "Invalid ASAP mode\n"; + break; + } +} + +void SetThisFileASAPToRemoveStmtIDs(std::string filename) { + for (auto &key : ASAPToRemoveAsanIDs) { + if (key.src_path == filename) { + ASAPToRemoveStmtIDs.insert(key.stmtid); + } + } + if (ASAN_DEBUG_MODE) { + LogInfo::MapleLogger() << "ASAP will remove " << std::dec << \ + ASAPToRemoveStmtIDs.size() << " asan checks of " << filename << "\n"; + } +} + +void ASAPRemoveAsanChecks(std::string filename, BlockNode* block) { + std::vector stmtBlockList = getAllOrderedStmtNodeList(block); + if (ASAN_DEBUG_MODE) { + // block->Dump(0); + LogInfo::MapleLogger() << "Before ASAP " << std::dec << \ + stmtBlockList.size() << " StmtNodes\n"; + } + std::set toRemoveStmtNodeSet; + size_t num_asan_blocks = 0; + for (auto &asanBlock : *AsanBlockListPtr->blocks) { + StmtID first_id = asanBlock.first->GetStmtID(); + if (ASAPToRemoveStmtIDs.find(first_id) != ASAPToRemoveStmtIDs.end()) { + num_asan_blocks++; + StmtNode* cur = asanBlock.first; + while (cur != nullptr && cur != asanBlock.last) { + toRemoveStmtNodeSet.insert(cur); + cur = cur->GetNext(); + } + toRemoveStmtNodeSet.insert(asanBlock.last); + } + } + if (ASAN_DEBUG_MODE) { + LogInfo::MapleLogger() << "ASAP removes " << std::dec << \ + num_asan_blocks << " asan blocks " << \ + toRemoveStmtNodeSet.size() << " StmtNodes\n"; + } + for (auto &pair : stmtBlockList) { + if (toRemoveStmtNodeSet.find(pair.first) != toRemoveStmtNodeSet.end()) { + pair.second->RemoveStmt(pair.first); + } + } + if (ASAN_DEBUG_MODE) { + stmtBlockList = getAllOrderedStmtNodeList(block); + LogInfo::MapleLogger() << "After ASAP " << std::dec << \ + stmtBlockList.size() << " StmtNodes\n"; + // block->Dump(0); + } +} + +} // namespace maple + +#endif // ENABLE_MAPLE_SAN \ No newline at end of file diff --git a/src/mapleall/maple_san/src/asan_function.cpp b/src/mapleall/maple_san/src/asan_function.cpp index ffbbf0be66c15e7abb522dd668a01f6385f47a8b..0192dd34d91236b3ae4badfb17e1e97cbf16ee63 100644 --- a/src/mapleall/maple_san/src/asan_function.cpp +++ b/src/mapleall/maple_san/src/asan_function.cpp @@ -21,18 +21,6 @@ bool isBlacklist(int k) { return (k == 120 || k == 125); } -void doInstrumentAddress(AddressSanitizer *Phase, StmtNode *InsertBefore, BaseNode *Addr, - size_t Alignment, size_t Granularity, uint64_t TypeSize, bool IsWrite) { - // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check - // if the data is properly aligned. - if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 || TypeSize == 128) && - (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8)) { - Phase->instrumentAddress(InsertBefore, Addr, TypeSize, IsWrite, nullptr); - } else { - Phase->instrumentUnusualSizeOrAlignment(InsertBefore, Addr, TypeSize, IsWrite); - } -} - void dumpFunc(MeFunction &mefunc) { StmtNodes &stmtNodes = mefunc.GetMirFunc()->GetBody()->GetStmtNodes(); for (StmtNode &stmt : stmtNodes) { @@ -41,6 +29,11 @@ void dumpFunc(MeFunction &mefunc) { } bool AddressSanitizer::instrumentFunction(MeFunction &mefunc) { + if (neighborOptSwitch){ + prepareInfo(mefunc); + DumpSkipPos(); + } + MIRBuilder *builder = mefunc.GetMIRModule().GetMIRBuilder(); this->func = &mefunc; if (mefunc.GetMirFunc()->GetAttr(FUNCATTR_extern)) { @@ -61,53 +54,44 @@ bool AddressSanitizer::instrumentFunction(MeFunction &mefunc) { maybeInsertDynamicShadowAtFunctionEntry(mefunc); - std::vector toInstrument; - std::vector noReturnCalls; - - // Definition for sanrazor - - // Destination map back to initiate stmt - std::map> brgoto_map; - // Map the stmt by using label ID to the basic block ID, for verification in the coverage - std::map stmt_to_bbID; + std::vector toInstrument = getAllOrderedStmtNodeList(mefunc.GetMirFunc()->GetBody()); + std::vector noReturnCalls; - // Distinguishing user checks or sanitzer checks - std::set userchecks; - - std::map stmt_id_to_stmt; - - for (StmtNode &stmt : mefunc.GetMirFunc()->GetBody()->GetStmtNodes()) { - toInstrument.push_back(&stmt); - if (CallNode *callNode = dynamic_cast(&stmt)) { + for (auto stmtBlockPair : toInstrument) { + StmtNode* stmtptr = stmtBlockPair.first; + CallNode *callNode = dynamic_cast(stmtptr); + if (callNode != nullptr) { MIRFunction *calleeFunc = GlobalTables::GetFunctionTable().GetFunctionFromPuidx(callNode->GetPUIdx()); if (calleeFunc->NeverReturns() || calleeFunc->GetName() == "exit") { - noReturnCalls.push_back(callNode); + noReturnCalls.push_back(stmtBlockPair); } } - if (stmt.GetOpCode() == OP_brtrue || stmt.GetOpCode() == OP_brfalse) { - userchecks.insert(&stmt); - } } int numInstrumented = 0; - - for (auto stmt : toInstrument) { - std::vector memAccVec = isInterestingMemoryAccess(stmt); + // The code below checks and instruments all indirect memory access + for (auto stmtBlockPair : toInstrument) { + StmtNode* stmtptr = stmtBlockPair.first; + BlockNode* blockptr = stmtBlockPair.second; + std::vector memAccVec = isInterestingMemoryAccess(stmtptr); if (memAccVec.size() > 0) { - instrumentMop(stmt, memAccVec); + instrumentMop(stmtptr, memAccVec, blockptr); } else { - instrumentMemIntrinsic(dynamic_cast(stmt)); + instrumentMemIntrinsic(dynamic_cast(stmtptr), blockptr); } numInstrumented++; } FunctionStackPoisoner fsp(mefunc, *this); + // variables are replaced with pointers after fsp bool changedStack = fsp.runOnFunction(); - for (auto stmt : noReturnCalls) { + for (auto stmtBlockPair : noReturnCalls) { MapleVector args(builder->GetCurrentFuncCodeMpAllocator()->Adapter()); CallNode *callNode = builder->CreateStmtCall(AsanHandleNoReturnFunc->GetPuidx(), args); - callNode->InsertAfterThis(*stmt); + StmtNode* stmtptr = stmtBlockPair.first; + BlockNode* blockptr = stmtBlockPair.second; + blockptr->InsertBefore(stmtptr, callNode); } int check_env = SANRAZOR_MODE(); @@ -115,7 +99,7 @@ bool AddressSanitizer::instrumentFunction(MeFunction &mefunc) { bool doSanrazor = (check_env > 0) && (numInstrumented > 0 || changedStack || !noReturnCalls.empty()); if (doSanrazor) { functionModified = true; - SanrazorProcess(mefunc, userchecks, brgoto_map, stmt_to_bbID, stmt_id_to_stmt, stmt_id_list, check_env); + SanrazorProcess(mefunc); } // dump IRs of each block // dumpFunc(mefunc); @@ -124,440 +108,15 @@ bool AddressSanitizer::instrumentFunction(MeFunction &mefunc) { return functionModified; } -void AddressSanitizer::SanrazorProcess(MeFunction &mefunc, std::set &userchecks, - std::map> &brgoto_map, - std::map &stmt_to_bbID, - std::map &stmt_id_to_stmt, std::vector &stmt_id_list, - int check_env) { - MIRBuilder *builder = mefunc.GetMIRModule().GetMIRBuilder(); +void AddressSanitizer::SanrazorProcess(MeFunction &mefunc) { + // MIRBuilder *builder = mefunc.GetMIRModule().GetMIRBuilder(); LogInfo::MapleLogger() << "****************SANRAZOR instrumenting****************" << "\n"; - MIRType *voidType = GlobalTables::GetTypeTable().GetVoid(); - // type 0 is user check, type 1 is sanitzer check - int type_of_check = 0; - // if br_true set to 1, else set to 0 - int br_true = 0; - // we check one step, than instrument_flag set to false - bool instrument_flag = false; - // info to plugin the shared lib - int bb_id = 0; - int stmt_id = 0; - MeCFG *cfg = mefunc.GetCfg(); - - std::set reg_order; - std::map> reg_to_stmt; - - std::set var_order; - std::map> var_to_stmt; - - std::vector san_set_check; - std::vector san_set_check_ID; - - std::vector user_set_check; - std::vector user_set_check_ID; - - std::vector stmt_to_remove; - std::vector call_stmt_to_remove; - std::vector> stmt_to_cleanup; - - for (BB *bb : cfg->GetAllBBs()) { - if (bb) { - for (StmtNode &stmt : bb->GetStmtNodes()) { - std::vector stmt_reg; - // OP_regassign -> = - if (stmt.GetOpCode() == OP_regassign) { - std::vector reg_redef_check_vec; - RegassignNode *regAssign = static_cast(&stmt); - if (reg_to_stmt.count(regAssign->GetRegIdx()) == 0) { - reg_order.insert(regAssign->GetRegIdx()); - } - reg_to_stmt[regAssign->GetRegIdx()].push_back(&stmt); - } else if (stmt.GetOpCode() == OP_dassign || stmt.GetOpCode() == OP_maydassign) { - std::vector var_redef_check_vec; - DassignNode *dassign = static_cast(&stmt); - // uint32 - if (var_to_stmt.count(dassign->GetStIdx().Idx()) == 0) { - var_order.insert(dassign->GetStIdx().Idx()); - } - var_to_stmt[dassign->GetStIdx().Idx()].push_back(&stmt); - } - // Unsupported OPCODE: - // 1. iassignoff (, ) - // 2023-02-07: I added iassignoff as interestedMemoryAccess, the address is - // calculated by ` + offset`. Hence, the instrumented code is - // simply the same as iassign - // 2. callassigned - // 2023-02-07: I added callassigned to transform the returned variables' names - // there are dassign OpCodes inside callassigned instruction - else if (stmt.GetOpCode() == OP_callassigned) { - /* - callassigned (, ..., ) { - dassign - dassign - ... - dassign } - */ - // We currently skip it, the retVar_XXX variable should not be instrumented - } else if (stmt.GetOpCode() == OP_iassign) { - // syntax: iassign (, ) - // %addr-expr = - BaseNode *addr_expr = stmt.Opnd(0); - // addr_expr have 3 cases - // iread u64 <* <$_TY_IDX111>> 22 (regread ptr %177) - if (addr_expr->GetOpCode() == OP_iread) { - std::vector dump_reg; - recursion(addr_expr, dump_reg); - for (int32 reg_tmp : dump_reg) { - if (reg_to_stmt.count(reg_tmp) == 0) { - reg_order.insert(reg_tmp); - } - reg_to_stmt[reg_tmp].push_back(&stmt); - } - } else if (addr_expr->GetOpCode() == OP_regread) { - // regread ptr %14 - RegreadNode *regread = static_cast(addr_expr); - if (reg_to_stmt.count(regread->GetRegIdx()) == 0) { - reg_order.insert(regread->GetRegIdx()); - } - reg_to_stmt[regread->GetRegIdx()].push_back(&stmt); - } else if (addr_expr->GetOpCode() == OP_dread) { - // dread i64 %asan_shadowBase - DreadNode *dread = static_cast(addr_expr); - if (var_to_stmt.count(dread->GetStIdx().Idx()) == 0) { - var_order.insert(dread->GetStIdx().Idx()); - } - var_to_stmt[dread->GetStIdx().Idx()].push_back(&stmt); - } else if (IsCommutative(addr_expr->GetOpCode())) { - std::vector dump_reg; - recursion(addr_expr->Opnd(0), dump_reg); - for (int32 reg_tmp : dump_reg) { - if (reg_to_stmt.count(reg_tmp) == 0) { - reg_order.insert(reg_tmp); - } - reg_to_stmt[reg_tmp].push_back(&stmt); - } - } - } else if (stmt.GetOpCode() == OP_brtrue || stmt.GetOpCode() == OP_brfalse) { - set_check br_tmp; - dep_expansion(stmt.Opnd(0), br_tmp, reg_to_stmt, var_to_stmt, mefunc); - gen_register_dep(&stmt, br_tmp, reg_to_stmt, var_to_stmt, mefunc); - - CondGotoNode *cgotoNode = static_cast(&stmt); - StmtNode *nextStmt = stmt.GetRealNext(); - instrument_flag = false; - // if it is a user check - if (userchecks.count(&stmt) > 0) { - instrument_flag = true; - user_set_check.push_back(br_tmp); - user_set_check_ID.push_back(stmt.GetStmtID()); - } else if (nextStmt != nullptr) { - if (CallNode *testcallNode = dynamic_cast(nextStmt)) { - MIRFunction *testcalleeFunc = - GlobalTables::GetFunctionTable().GetFunctionFromPuidx(testcallNode->GetPUIdx()); - // instrument if it is a call to sanitzer - if (testcalleeFunc->GetName().find("__asan_report_") == 0) { - san_set_check.push_back(br_tmp); - san_set_check_ID.push_back(stmt.GetStmtID()); - instrument_flag = true; - } - } - } - if (instrument_flag) { - uint32 goto_id = cgotoNode->GetOffset(); - brgoto_map[goto_id].push_back(&stmt); - uint32 lb_id = (static_cast(&stmt))->GetLabelIdx(); - // save the BB id for checking - stmt_to_bbID[lb_id] = bb->UintID(); - stmt_id_to_stmt[stmt.GetStmtID()] = &stmt; - stmt_id_list.push_back(stmt.GetStmtID()); - } - } - } - } - } - if (brgoto_map.size() > 0) { - // We loop again, if - for (BB *bb : cfg->GetAllBBs()) { - if (bb) { - for (StmtNode &stmt : bb->GetStmtNodes()) { - uint32 label_index = (static_cast(&stmt))->GetLabelIdx(); - auto iter = brgoto_map.find(label_index); - // Some instruction with goto, will have the same LB id, as result - // we may double count our coverage, so, we exclude op_goto - if (iter != brgoto_map.end() && stmt.GetOpCode() != OP_goto) { - std::vector tmp = brgoto_map[label_index]; - for (auto stmt_tmp : tmp) { - uint32 tmp_label_index = (static_cast(stmt_tmp))->GetLabelIdx(); - auto id_check = stmt_to_bbID.find(tmp_label_index); - if (id_check == stmt_to_bbID.end()) { - bb_id = 0; - } else { - bb_id = stmt_to_bbID[tmp_label_index]; - } - stmt_id = stmt_tmp->GetStmtID(); - // We reverse the logic here - // Since brtrue, means jump if the check equal to true - // The instruction itself will need to be false in order for being executed - if (stmt_tmp->GetOpCode() == OP_brtrue) { - br_true = 0; - } else { - br_true = 1; - } - // record whether it is a usercheck or sancheck - auto search = userchecks.find(stmt_tmp); - if (search != userchecks.end()) { - type_of_check = 0; - } else { - type_of_check = 1; - } - CallNode *caller_cov = retCallCOV(mefunc, bb_id, stmt_id, br_true, type_of_check); - CallNode *callee_cov = retCallCOV(mefunc, bb_id, stmt_id, br_true ^ 1, type_of_check); - caller_cov->InsertBeforeThis(*stmt_tmp); - callee_cov->InsertBeforeThis(stmt); - stmt_to_cleanup.emplace_back(caller_cov, bb); - stmt_to_cleanup.emplace_back(callee_cov, bb); - } - } - } - } - } - } - - if (check_env == 2) { - LogInfo::MapleLogger() << "Solving Sat" - << "\n"; - // If is eliminate mode - std::string fn_UC = mefunc.GetMIRModule().GetFileName() + "_UC"; - std::string fn_SC = mefunc.GetMIRModule().GetFileName() + "_SC"; - std::map san_struct_UC = gen_dynmatch(fn_UC); - std::map san_struct_SC = gen_dynmatch(fn_SC); - std::map> SC_SC_mapping; - std::map> UC_SC_mapping; - - for (auto const &[id_UC, val_UC] : san_struct_UC) { - for (auto const &[id_SC, val_SC] : san_struct_SC) { - // For SC-UC case, SC must be var a - if (dynamic_sat(val_SC, val_UC, false)) { - if (UC_SC_mapping.count(id_SC)) { - UC_SC_mapping[id_SC].insert(id_UC); - } else { - std::set tmp_set; - tmp_set.insert(id_UC); - UC_SC_mapping[id_SC] = tmp_set; - } - if (UC_SC_mapping.count(id_UC)) { - UC_SC_mapping[id_UC].insert(id_SC); - } else { - std::set tmp_set; - tmp_set.insert(id_SC); - UC_SC_mapping[id_UC] = tmp_set; - } - } - } - } - - for (auto const &[id_SC_1, val_SC_1] : san_struct_SC) { - for (auto const &[id_SC_2, val_SC_2] : san_struct_SC) { - if (id_SC_1 != id_SC_2) { - if (dynamic_sat(val_SC_1, val_SC_2, false)) { - if (SC_SC_mapping.count(id_SC_1)) { - SC_SC_mapping[id_SC_1].insert(id_SC_2); - } else { - std::set tmp_set; - tmp_set.insert(id_SC_2); - SC_SC_mapping[id_SC_1] = tmp_set; - } - if (SC_SC_mapping.count(id_SC_2)) { - SC_SC_mapping[id_SC_2].insert(id_SC_1); - } else { - std::set tmp_set; - tmp_set.insert(id_SC_1); - SC_SC_mapping[id_SC_2] = tmp_set; - } - } - } - } - } - // san deletion - int SCSC_SAT_CNT = 0; - int SCSC_SAT_RUNS = 0; - for (size_t san_i = 0; san_i < san_set_check.size(); san_i++) { - for (size_t san_j = san_i + 1; san_j < san_set_check.size(); san_j++) { - SCSC_SAT_RUNS += 1; - uint32 san_i_stmt_ID = san_set_check_ID[san_i]; - uint32 san_j_stmt_ID = san_set_check_ID[san_j]; - if (SC_SC_mapping.count(san_i_stmt_ID)) { - if (SC_SC_mapping[san_i_stmt_ID].count(san_j_stmt_ID)) { - if (sat_check(san_set_check[san_i], san_set_check[san_j])) { - SCSC_SAT_CNT += 1; - StmtNode *erase_stmt; - // we just assume the larger the stmtID - // the later the stmt appears, which mostly work - if (san_i_stmt_ID > san_j_stmt_ID) { - erase_stmt = stmt_id_to_stmt[san_i_stmt_ID]; - } else { - erase_stmt = stmt_id_to_stmt[san_j_stmt_ID]; - } - if (std::count(stmt_to_remove.begin(), stmt_to_remove.end(), erase_stmt) == 0) { - stmt_to_remove.push_back(erase_stmt); - call_stmt_to_remove.push_back(erase_stmt->GetRealNext()->GetRealNext()); - } - } - } - } - } - } - int UCSC_SAT_CNT = 0; - int UCSC_SAT_RUNS = 0; - for (size_t san_i = 0; san_i < san_set_check.size(); san_i++) { - for (size_t user_j = 0; user_j < user_set_check.size(); user_j++) { - UCSC_SAT_RUNS += 1; - uint32 san_i_stmt_ID = san_set_check_ID[san_i]; - uint32 user_j_stmt_ID = user_set_check_ID[user_j]; - if (UC_SC_mapping.count(san_i_stmt_ID)) { - if (UC_SC_mapping[san_i_stmt_ID].count(user_j_stmt_ID)) { - print_dep(user_set_check[user_j]); - print_dep(san_set_check[san_i]); - bool goflag = false; - if (sat_check(user_set_check[user_j], san_set_check[san_i])) { - goflag = true; - } else { - san_set_check[san_i].opcode.erase( - std::remove_if(san_set_check[san_i].opcode.begin(), san_set_check[san_i].opcode.end(), isBlacklist), - san_set_check[san_i].opcode.end()); - if (sat_check(user_set_check[user_j], san_set_check[san_i])) { - goflag = true; - } - } - if (goflag) { - UCSC_SAT_CNT += 1; - StmtNode *erase_stmt = stmt_id_to_stmt[san_i_stmt_ID]; - if (std::count(stmt_to_remove.begin(), stmt_to_remove.end(), erase_stmt) == 0) { - stmt_to_remove.push_back(erase_stmt); - call_stmt_to_remove.push_back(erase_stmt->GetRealNext()->GetRealNext()); - } - } - } - } - } - } - LogInfo::MapleLogger() << "UC size: " << user_set_check.size() << "\n "; - LogInfo::MapleLogger() << "SC size: " << san_set_check.size() << "\n "; - - LogInfo::MapleLogger() << "Total UC-SC pairs: " << UCSC_SAT_RUNS << " Eliminate: " << UCSC_SAT_CNT << "\n "; - LogInfo::MapleLogger() << "Total SC-SC pairs: " << SCSC_SAT_RUNS << " Eliminate: " << SCSC_SAT_CNT << "\n "; - - LogInfo::MapleLogger() << "Removing phase: \n"; - for (BB *bb : cfg->GetAllBBs()) { - if (bb) { - for (StmtNode &stmt : bb->GetStmtNodes()) { - if (std::count(stmt_to_remove.begin(), stmt_to_remove.end(), &stmt)) { - if (CallNode *testcallNode = dynamic_cast(&stmt)) { - stmt_to_cleanup.emplace_back(&stmt, bb); - } else { - set_check br_tmp; - dep_expansion(stmt.Opnd(0), br_tmp, reg_to_stmt, var_to_stmt, mefunc); - std::set tmp_var_set; - while (!br_tmp.var_live.empty()) { - size_t var_to_check = br_tmp.var_live.top(); - tmp_var_set.insert(var_to_check); - br_tmp.var_live.pop(); - } - bool term_flag = false; - StmtNode *prevStmt = stmt.GetPrev(); - while (!term_flag && prevStmt != nullptr) { - if (prevStmt->GetOpCode() == OP_brtrue || prevStmt->GetOpCode() == OP_brfalse) { - set_check br_local_tmp; - bool trigger = false; - dep_expansion(prevStmt->Opnd(0), br_local_tmp, reg_to_stmt, var_to_stmt, mefunc); - while (!br_local_tmp.var_live.empty()) { - uint32 var_to_check = br_local_tmp.var_live.top(); - if (mefunc.GetMIRModule().CurFunction()->GetSymbolTabSize() >= var_to_check) { - MIRSymbol *var = mefunc.GetMIRModule().CurFunction()->GetSymbolTabItem(var_to_check); - if (var->GetName().find("asan_addr") == 0) { - trigger = true; - tmp_var_set.insert(var_to_check); - } - } - br_local_tmp.var_live.pop(); - } - // we hit a possible UC, we terminate here - if (!trigger) { - term_flag = true; - } else { - prevStmt = prevStmt->GetPrev(); - // bb->RemoveStmtNode(prevStmt->GetRealNext()); - stmt_to_cleanup.emplace_back(prevStmt->GetRealNext(), bb); - } - } else if (prevStmt->GetOpCode() == OP_dassign) { - DassignNode *dassign = static_cast(prevStmt); - // dump extra dependence - set_check br_local_tmp; - dep_expansion(prevStmt, br_local_tmp, reg_to_stmt, var_to_stmt, mefunc); - while (!br_local_tmp.var_live.empty()) { - uint32 var_to_check = br_local_tmp.var_live.top(); - if (mefunc.GetMIRModule().CurFunction()->GetSymbolTabSize() >= var_to_check) { - MIRSymbol *var = mefunc.GetMIRModule().CurFunction()->GetSymbolTabItem(var_to_check); - if (var->GetName().find("asan_addr") == 0) { - tmp_var_set.insert(var_to_check); - } - } - br_local_tmp.var_live.pop(); - } - if (tmp_var_set.count(dassign->GetStIdx().Idx())) { - prevStmt = prevStmt->GetPrev(); - stmt_to_cleanup.emplace_back(prevStmt->GetRealNext(), bb); - } else { - prevStmt = prevStmt->GetPrev(); - } - } else if (prevStmt->GetOpCode() == OP_dassignoff) { - /* - The dassignoff is not documented in the MAPLE IR - It simulate the iassignoff implementation - dassignoff () - */ - DassignoffNode *dassignoff = dynamic_cast(prevStmt); - CHECK_FATAL(dassignoff != nullptr, "Node with OP_dassignoff but not DassignoffNode"); - // TODO: I am not sure what should be done for it now ... - } else if (CallNode *tmpTestCallNode = dynamic_cast(prevStmt)) { - // stop if we hit a Call - term_flag = true; - } else { - prevStmt = prevStmt->GetPrev(); - } - } - stmt_to_cleanup.emplace_back(&stmt, bb); - } - } - } - } - } - for (auto bb_pair : stmt_to_cleanup) { - bb_pair.second->RemoveStmtNode(bb_pair.first); - } - int erase_ctr = 0; - LogInfo::MapleLogger() << "Clean up redundant call stmt " - << "\n"; - BlockNode *bodyNode = mefunc.GetMirFunc()->GetBody(); - for (auto stmt : call_stmt_to_remove) { - erase_ctr += 1; - bodyNode->RemoveStmt(stmt); - } - LogInfo::MapleLogger() << "Erased: " << erase_ctr << "\n"; - } - if ((mefunc.GetName().compare("main") == 0) && (check_env == 1)) { - // Register the call, such it dump the coverage at the exit - __san_cov_flush = getOrInsertFunction(builder, "__san_cov_flush", voidType, {}); - // Insert the atexit to the starting point of the main - MapleVector args(mefunc.GetMIRModule().GetMPAllocator().Adapter()); - StmtNode *stmt_tmp = builder->CreateStmtCall(__san_cov_flush->GetPuidx(), args); - mefunc.GetMirFunc()->GetBody()->InsertFirst(stmt_tmp); - } LogInfo::MapleLogger() << "****************SANRAZOR Done****************" << "\n"; } -void AddressSanitizer::instrumentMemIntrinsic(IntrinsiccallNode *stmtNode) { +void AddressSanitizer::instrumentMemIntrinsic(IntrinsiccallNode *stmtNode, BlockNode* block) { if (stmtNode == nullptr) { return; } @@ -570,7 +129,7 @@ void AddressSanitizer::instrumentMemIntrinsic(IntrinsiccallNode *stmtNode) { args.emplace_back(stmtNode->Opnd(2)); CallNode *registerCallNode = module->GetMIRBuilder()->CreateStmtCall(AsanMemset->GetPuidx(), args); - func->GetMirFunc()->GetBody()->ReplaceStmt1WithStmt2(stmtNode, registerCallNode); + block->ReplaceStmt1WithStmt2(stmtNode, registerCallNode); return; } case INTRN_C_memmove: { @@ -580,7 +139,7 @@ void AddressSanitizer::instrumentMemIntrinsic(IntrinsiccallNode *stmtNode) { args.emplace_back(stmtNode->Opnd(2)); CallNode *registerCallNode = module->GetMIRBuilder()->CreateStmtCall(AsanMemmove->GetPuidx(), args); - func->GetMirFunc()->GetBody()->ReplaceStmt1WithStmt2(stmtNode, registerCallNode); + block->ReplaceStmt1WithStmt2(stmtNode, registerCallNode); return; } case INTRN_C_memcpy: { @@ -590,7 +149,7 @@ void AddressSanitizer::instrumentMemIntrinsic(IntrinsiccallNode *stmtNode) { args.emplace_back(stmtNode->Opnd(2)); CallNode *registerCallNode = module->GetMIRBuilder()->CreateStmtCall(AsanMemcpy->GetPuidx(), args); - func->GetMirFunc()->GetBody()->ReplaceStmt1WithStmt2(stmtNode, registerCallNode); + block->ReplaceStmt1WithStmt2(stmtNode, registerCallNode); return; } default: { @@ -606,11 +165,18 @@ MemoryAccess AddressSanitizer::getIassignMemoryAccess(IassignNode &iassign) { size_t align = pointedTy->GetAlign(); if (pointedTy->IsStructType()) { MIRStructType *mirStructType = dynamic_cast(pointedTy); - if (iassign.GetFieldID() > 0) { - pointedTy = mirStructType->GetFieldType(iassign.GetFieldID()); - align = pointedTy->GetAlign(); + FieldID fieldID = iassign.GetFieldID(); + if (fieldID > 0) { + pointedTy = mirStructType->GetFieldType(fieldID); + if (fieldID == 1 && mirStructType->GetFieldOffsetFromBaseAddr(fieldID).byteOffset == 0) { + // I do not know why the first field can be inequal to the whole structure's align sometimes + align = mirStructType->GetAlign(); + } else { + align = pointedTy->GetAlign(); + } } else { - align = pointedTy->GetSize(); + // align = pointedTy->GetSize(); + align = mirStructType->GetAlign(); } } BaseNode *addr = @@ -647,11 +213,18 @@ MemoryAccess AddressSanitizer::getIreadMemoryAccess(IreadNode &iread, StmtNode * size_t align = pointedTy->GetAlign(); if (pointedTy->IsStructType()) { MIRStructType *mirStructType = dynamic_cast(pointedTy); - if (iread.GetFieldID() > 0) { - pointedTy = mirStructType->GetFieldType(iread.GetFieldID()); - align = pointedTy->GetAlign(); + FieldID fieldID = iread.GetFieldID(); + if (fieldID > 0) { + pointedTy = mirStructType->GetFieldType(fieldID); + if (fieldID == 1 && mirStructType->GetFieldOffsetFromBaseAddr(fieldID).byteOffset == 0) { + // I do not know why the first field can be inequal to the whole structure's align sometimes + align = mirStructType->GetAlign(); + } else { + align = pointedTy->GetAlign(); + } } else { - align = pointedTy->GetSize(); + // align = pointedTy->GetSize(); + align = mirStructType->GetAlign(); } } BaseNode *addr = @@ -677,7 +250,14 @@ std::vector AddressSanitizer::isInterestingMemoryAccess(StmtNode * IassignNode *iassign = dynamic_cast(baseNode); CHECK_FATAL((iassign != nullptr), "Invalid IR node with OpCode OP_iassign"); struct MemoryAccess memoryAccess = getIassignMemoryAccess(*iassign); - memAccess.emplace_back(memoryAccess); + /* MEDoNeighborOpt */ + int64 newSize = checkNeighborOpt(iassign); + LogInfo::MapleLogger() << "checkNeighborOpt: "<< newSize <<"\n"; + if (newSize > 0) + memoryAccess.typeSize = newSize << 3; + if (newSize >= 0) + memAccess.emplace_back(memoryAccess); + // the rhs-expr can still read from somewhere, push it to stack baseNodeStack.push(iassign->Opnd(1)); break; @@ -686,7 +266,14 @@ std::vector AddressSanitizer::isInterestingMemoryAccess(StmtNode * IassignoffNode *iassignoff = dynamic_cast(baseNode); CHECK_FATAL((iassignoff != nullptr), "Invalid IR node with OpCode OP_iassignoff"); struct MemoryAccess memoryAccess = getIassignoffMemoryAccess(*iassignoff); - memAccess.emplace_back(memoryAccess); + /* MEDoNeighborOpt */ + int64 newSize = checkNeighborOpt(iassignoff); + LogInfo::MapleLogger() << "checkNeighborOpt: " << newSize << "\n"; + if (newSize > 0) + memoryAccess.typeSize = newSize << 3; + if (newSize >= 0) + memAccess.emplace_back(memoryAccess); + // the rhs-expr can still read from somewhere, push it to stack baseNodeStack.push(iassignoff->GetBOpnd(1)); break; @@ -703,7 +290,13 @@ std::vector AddressSanitizer::isInterestingMemoryAccess(StmtNode * } CHECK_FATAL((iread != nullptr), "Invalid IR node with OpCode OP_iread."); struct MemoryAccess memoryAccess = getIreadMemoryAccess(*iread, stmtNode); - memAccess.emplace_back(memoryAccess); + /* MEDoNeighborOpt */ + int64 newSize = checkNeighborOpt(iread); + LogInfo::MapleLogger() << "checkNeighborOpt: " << newSize << "\n"; + if (newSize > 0) + memoryAccess.typeSize = newSize << 3; + if (newSize >= 0) + memAccess.emplace_back(memoryAccess); break; } case OP_ireadoff: @@ -764,12 +357,16 @@ void AddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(const MeFunction LocalDynamicShadow = mirBuilder->CreateExprIread(*IntPtrTy, *Int64PtrTy, 0, dreadNode); } -void AddressSanitizer::instrumentMop(StmtNode *I, std::vector &memoryAccess) { +void AddressSanitizer::instrumentMop(StmtNode *I, std::vector &memoryAccess, BlockNode* block) { assert(memoryAccess.size() > 0); size_t granularity = 1 << Mapping.Scale; for (MemoryAccess access : memoryAccess) { - doInstrumentAddress(this, I, access.ptrOperand, access.alignment, granularity, access.typeSize, access.isWrite); + uint64 newMemSize = access.typeSize; + // LogInfo::MapleLogger() << "doInstrumentAddress:"<push_back(asanBlock); } } @@ -792,9 +389,22 @@ BaseNode *AddressSanitizer::memToShadow(BaseNode *Shadow, MIRBuilder &mirBuilder } } -void AddressSanitizer::instrumentAddress(StmtNode *InsertBefore, BaseNode *Addr, uint64_t TypeSize, +AsanVirtualBlock AddressSanitizer::doInstrumentAddress(StmtNode *InsertBefore, BaseNode *Addr, BlockNode* block, + size_t Alignment, size_t Granularity, uint64_t TypeSize, bool IsWrite) { + // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check + // if the data is properly aligned. + if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 || TypeSize == 128) && + (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8)) { + return instrumentAddress(InsertBefore, Addr, block, TypeSize, IsWrite, nullptr); + } else { + return instrumentUnusualSizeOrAlignment(InsertBefore, Addr, block, TypeSize, IsWrite); + } +} + +AsanVirtualBlock AddressSanitizer::instrumentAddress(StmtNode *InsertBefore, BaseNode *Addr, BlockNode* block, uint64_t TypeSize, bool IsWrite, BaseNode *SizeArgument) { MIRBuilder *mirBuilder = module->GetMIRBuilder(); + AsanVirtualBlock asanBlock; #ifdef ENABLERBTREE auto i32PrimTy = GlobalTables::GetTypeTable().GetInt32()->GetPrimType(); @@ -810,8 +420,9 @@ void AddressSanitizer::instrumentAddress(StmtNode *InsertBefore, BaseNode *Addr, MIRSymbol *addrSymbol = getOrCreateSymbol(mirBuilder, IntPtrTy->GetTypeIndex(), "asan_addr", kStVar, kScAuto, module->CurFunction(), kScopeLocal); DassignNode *dassignNode = mirBuilder->CreateStmtDassign(addrSymbol->GetStIdx(), 0, Addr); - - func->GetMirFunc()->GetBody()->InsertBefore(InsertBefore, dassignNode); + block->InsertBefore(InsertBefore, dassignNode); + // save the first pointer + asanBlock.first = dassignNode; // Assign the address to %addr MIRType *shadowTy = GlobalTables::GetTypeTable().GetInt8(); @@ -825,7 +436,8 @@ void AddressSanitizer::instrumentAddress(StmtNode *InsertBefore, BaseNode *Addr, module->CurFunction(), kScopeLocal); dassignNode = mirBuilder->CreateStmtDassign(shadowValue->GetStIdx(), 0, mirBuilder->CreateExprIread(*shadowTy, *shadowPtrTy, 0, shadowPtr)); - dassignNode->InsertAfterThis(*InsertBefore); + block->InsertBefore(InsertBefore, dassignNode); + // Check if value != 0 BinaryNode *cmp = mirBuilder->CreateExprBinary( OP_ne, *shadowTy, mirBuilder->CreateDread(*shadowValue, shadowTy->GetPrimType()), cmpVal); @@ -844,25 +456,38 @@ void AddressSanitizer::instrumentAddress(StmtNode *InsertBefore, BaseNode *Addr, } CallNode *crash = generateCrashCode(addrSymbol, IsWrite, accessSizeIndex, SizeArgument); crash->InsertBeforeThis(*crashBlock); + asanBlock.last = InsertBefore->GetPrev(); + asanBlock.crashes.push_back(crash); + asanBlock.normal = InsertBefore; + return asanBlock; } -void AddressSanitizer::instrumentUnusualSizeOrAlignment(StmtNode *InsertBefore, BaseNode *Addr, +AsanVirtualBlock AddressSanitizer::instrumentUnusualSizeOrAlignment(StmtNode *InsertBefore, BaseNode *Addr, BlockNode* block, uint64_t TypeSize, bool IsWrite) { MIRBuilder *mirBuilder = module->GetMIRBuilder(); + AsanVirtualBlock asanBlock; + BaseNode *size = mirBuilder->CreateIntConst(TypeSize / 8, IntPtrPrim); MIRSymbol *addrSymbol = getOrCreateSymbol(mirBuilder, IntPtrTy->GetTypeIndex(), "asan_addr", kStVar, kScAuto, module->CurFunction(), kScopeLocal); - DassignNode *dassignNode = mirBuilder->CreateStmtDassign(addrSymbol->GetStIdx(), 0, Addr); - dassignNode->InsertAfterThis(*InsertBefore); + // DassignNode *dassignNode = mirBuilder->CreateStmtDassign(addrSymbol->GetStIdx(), 0, Addr); + // block->InsertBefore(InsertBefore, dassignNode); + AsanVirtualBlock check1 = instrumentAddress(InsertBefore, Addr, block, 8, IsWrite, size); + asanBlock.first = check1.first; + BinaryNode *binaryNode = mirBuilder->CreateExprBinary(OP_add, *IntPtrTy, mirBuilder->CreateDread(*addrSymbol, IntPtrPrim), mirBuilder->CreateIntConst(TypeSize / 8 - 1, IntPtrPrim)); MIRSymbol *lastByteSymbol = getOrCreateSymbol(mirBuilder, IntPtrTy->GetTypeIndex(), "asan_lastByte", kStVar, kScAuto, module->CurFunction(), kScopeLocal); DassignNode *lastByte = mirBuilder->CreateStmtDassign(lastByteSymbol->GetStIdx(), 0, binaryNode); - lastByte->InsertAfterThis(*InsertBefore); - instrumentAddress(InsertBefore, Addr, 8, IsWrite, size); - instrumentAddress(InsertBefore, mirBuilder->CreateDread(*lastByteSymbol, PTY_ptr), 8, IsWrite, size); + block->InsertBefore(InsertBefore, lastByte); + AsanVirtualBlock check2 = instrumentAddress(InsertBefore, mirBuilder->CreateDread(*lastByteSymbol, PTY_ptr), block, 8, IsWrite, size); + asanBlock.last = InsertBefore->GetPrev(); + asanBlock.crashes.insert(asanBlock.crashes.end(), check1.crashes.begin(), check1.crashes.end()); + asanBlock.crashes.insert(asanBlock.crashes.end(), check2.crashes.begin(), check2.crashes.end()); + // TODO: to further improve, we may treat this as two AsanVirtualBlocks + return asanBlock; } BinaryNode *AddressSanitizer::createSlowPathCmp(StmtNode *InsBefore, BaseNode *AddrLong, BaseNode *ShadowValue, @@ -914,8 +539,7 @@ StmtNode *AddressSanitizer::splitIfAndElseBlock(Opcode op, StmtNode *elsePart, c brStmt->InsertAfterThis(*elsePart); brStmt->SetOffset(labelIdx); - LabelNode *labelStmt = module->CurFuncCodeMemPool()->New(); - labelStmt->SetLabelIdx(labelIdx); + LabelNode *labelStmt = mirBuilder->CreateStmtLabel(labelIdx); labelStmt->InsertAfterThis(*elsePart); return brStmt; } @@ -974,6 +598,88 @@ bool AddressSanitizer::isInterestingAlloca(const UnaryNode &unaryNode) { return isInteresting; } +void AddressSanitizer::prepareBlockNodeInfo(BlockNode* block, std::vector curDirections) { + for (StmtNode &stmt : block->GetStmtNodes()) { + if (stmt.GetOpCode() == OP_block) { + BlockNode* tmpBlock = dynamic_cast(&stmt); + CHECK_NULL_FATAL(tmpBlock); + prepareBlockNodeInfo(tmpBlock, curDirections); + } + else { + prepareInfoInternal(&stmt, &stmt, curDirections); + } + } +} + +void AddressSanitizer::prepareInfo(MeFunction &mefunc){ + std::vector directions; + prepareBlockNodeInfo(mefunc.GetMirFunc()->GetBody(), directions); + // for (StmtNode &stmt : mefunc.GetMirFunc()->GetBody()->GetStmtNodes()) { + // std::vector directions; + // prepareInfoInternal(&stmt, &stmt, directions); + // } +} + +void AddressSanitizer::prepareInfoInternal(StmtNode *stmt, BaseNode *baseNode, std::vector curDirections){ + if (baseNode != nullptr) { + this->node2stmt[baseNode] = stmt; + this->node2directions[baseNode] = curDirections; + for (size_t j = 0; j < baseNode->NumOpnds(); ++j) { + if (baseNode->GetOpCode() == OP_return) continue; + std::vector newDirections; + std::copy(curDirections.begin(), curDirections.end(), back_inserter(newDirections)); + newDirections.push_back(j); + prepareInfoInternal(stmt, baseNode->Opnd(j), newDirections); + } + } +} + +void AddressSanitizer::DumpSkipPos(){ + std::vector::iterator itr; + for (itr = this->skipPos.begin(); itr != this->skipPos.end(); ++itr) { + SrcPosition stmtPos = itr->stmtPos; + std::vector directions = itr->directions; + Opcode op = itr->op; + int64_t newSize = itr->newSize; + + LogInfo::MapleLogger() << "stmtPos:\n"; + stmtPos.Dump(); + LogInfo::MapleLogger() << "directions:\n"; + for (auto dir : directions){ + LogInfo::MapleLogger() << dir << " "; + } + LogInfo::MapleLogger() << "\n"; + LogInfo::MapleLogger() << "op: " << int(op) << "\n"; + LogInfo::MapleLogger() << "newSize: " << newSize << "\n"; + } +} + +int64 AddressSanitizer::checkNeighborOpt(BaseNode *node){ + if (!neighborOptSwitch) return 0; + StmtNode * stmt = this->node2stmt[node]; + if (stmt == nullptr) + return 0; + SrcPosition srcPos = stmt->GetSrcPos(); + std::vector directions = this->node2directions[node]; + Opcode op = node->GetOpCode(); + for (auto skip : this->skipPos){ + if (skip.stmtPos.IsEq(srcPos)){ + if (directions == skip.directions){ + if (op == skip.op){ + srcPos.Dump(); + LogInfo::MapleLogger() << "directions:\n"; + for (auto it : directions){ + LogInfo::MapleLogger() << it << " "; + } + LogInfo::MapleLogger() << "\nnewSize: " << skip.newSize <<"\n"; + return skip.newSize; + } + } + } + } + return 0; +} + } // namespace maple #endif \ No newline at end of file diff --git a/src/mapleall/maple_san/src/asan_module.cpp b/src/mapleall/maple_san/src/asan_module.cpp index ce1d2d01b8d54ef1198e465e12c6f27a97c477cb..dfd278e14f1a83728071347f6a433d0ab1a35633 100644 --- a/src/mapleall/maple_san/src/asan_module.cpp +++ b/src/mapleall/maple_san/src/asan_module.cpp @@ -3,395 +3,529 @@ // #ifdef ENABLE_MAPLE_SAN +#include "asan_config.h" #include "asan_module.h" -#include "mir_builder.h" + #include "asan_interfaces.h" +#include "mir_builder.h" +#include namespace maple { - void ModuleAddressSanitizer::initializeCallbacks() { - MIRBuilder *mirBuilder = module->GetMIRBuilder(); - - ArgVector args(module->GetMPAllocator().Adapter()); - MIRFunction *init_func = mirBuilder->CreateFunction("__cxx_global_var_init", - *GlobalTables::GetTypeTable().GetVoid(), - args, false, true); - MIRFunction *fini_func = mirBuilder->CreateFunction("__cxx_global_var_fini", - *GlobalTables::GetTypeTable().GetVoid(), - args, false, true); - init_func->SetAttr(FUNCATTR_local); - fini_func->SetAttr(FUNCATTR_local); - - module->AddFunction(init_func); - module->AddFunction(fini_func); - MIRType *retType = GlobalTables::GetTypeTable().GetVoid(); - - // Declare functions that register/unregister globals. - AsanRegisterGlobals = getOrInsertFunction( - mirBuilder, kAsanRegisterGlobalsName, retType, {IntPtrTy, IntPtrTy}); - AsanUnregisterGlobals = getOrInsertFunction( - mirBuilder, kAsanUnregisterGlobalsName, retType, {IntPtrTy, IntPtrTy}); +std::string ModuleAddressSanitizer::GetSrcFilePath() const { + char* filename = std::getenv(ASAN_MODULE_NAME_MACRO); + if (filename == nullptr) { + return module->GetFileNameWithPath(); + } else { + std::filesystem::path _name = filename; + if (_name.is_absolute()) { + CHECK_FATAL(std::filesystem::exists(_name), "File not found: %s", _name.string().c_str()); + return _name.string(); + } else { + std::filesystem::path ret = std::filesystem::current_path() / _name; + CHECK_FATAL(std::filesystem::exists(ret), "File not found: %s", ret.string().c_str()); + return ret.string(); + } } +} - bool ModuleAddressSanitizer::instrumentModule() { - initializeCallbacks(); - MapleVector args(module->GetMIRBuilder()->GetCurrentFuncCodeMpAllocator()->Adapter()); - BlockNode *ctorToBeInserted = CreateCtorAndInitFunctions(kAsanModuleCtorName, kAsanInitName, args); +std::string ModuleAddressSanitizer::GetModuleSymbolPostfix() const { + std::string ret = GetSrcFilePath(); + std::replace(ret.begin(), ret.end(), '/', '_'); + std::replace(ret.begin(), ret.end(), '.', '_'); + std::replace(ret.begin(), ret.end(), '-', '_'); + std::replace(ret.begin(), ret.end(), '@', '_'); + return ret; +} - InstrumentGlobals(ctorToBeInserted); +MIRSymbol* ModuleAddressSanitizer::GetSymbolFromName(std::string name) { + GStrIdx symIdx = GlobalTables::GetStrTable().GetOrCreateStrIdxFromName(name); + return GlobalTables::GetGlobalTables().GetGsymTable().GetSymbolFromStrIdx(symIdx); +} - appendToGlobalCtors(*module, AsanCtorFunction); - if (AsanDtorFunction) { - appendToGlobalDtors(*module, AsanDtorFunction); - } - module->SetSomeSymbolNeedForDecl(false); - return true; +void ModuleAddressSanitizer::AddGlobalChar(std::string name, char value, bool isInit) { + MIRType *type = GlobalTables::GetTypeTable().GetPrimType(PTY_i8); + MIRSymbol *symbol = nullptr; + if (isInit) { + symbol = module->GetMIRBuilder()->CreateSymbol(type->GetTypeIndex(), name, kStVar, kScGlobal, nullptr, kScopeGlobal); + } else { + symbol = GetSymbolFromName(name); + } + MIRConst *constValue = GlobalTables::GetIntConstTable().GetOrCreateIntConst(value, *type); + symbol->SetKonst(constValue); + if (isInit) { + module->AddSymbol(symbol); } +} - bool ModuleAddressSanitizer::InstrumentGlobals(BlockNode *ctorToBeInserted) { - std::vector globalsToChange; - for (MIRSymbol *global : GetGlobalVaribles(*module)) { - if (ShouldInstrumentGlobal(global)) { - globalsToChange.push_back(global); - } +void ModuleAddressSanitizer::AddGlobalInt(std::string name, int64_t value, bool isInit) { + MIRType *type = GlobalTables::GetTypeTable().GetPrimType(PTY_i64); + MIRSymbol *symbol = nullptr; + if (isInit) { + symbol = module->GetMIRBuilder()->CreateSymbol(type->GetTypeIndex(), name, kStVar, kScGlobal, nullptr, kScopeGlobal); + } else { + symbol = GetSymbolFromName(name); + } + MIRConst *constValue = GlobalTables::GetIntConstTable().GetOrCreateIntConst(value, *type); + symbol->SetKonst(constValue); + if (isInit) { + module->AddSymbol(symbol); + } +} + +void ModuleAddressSanitizer::AddGlobalUInt(std::string name, uint64_t value, bool isInit) { + MIRType *type = GlobalTables::GetTypeTable().GetPrimType(PTY_u64); + MIRSymbol *symbol = nullptr; + if (isInit) { + symbol = module->GetMIRBuilder()->CreateSymbol(type->GetTypeIndex(), name, kStVar, kScGlobal, nullptr, kScopeGlobal); + } else { + symbol = GetSymbolFromName(name); + } + MIRConst *constValue = GlobalTables::GetIntConstTable().GetOrCreateIntConst(value, *type); + symbol->SetKonst(constValue); + if (isInit) { + module->AddSymbol(symbol); + } +} + +void ModuleAddressSanitizer::AddGlobalString(std::string name, std::string value, bool isInit) { + MIRType *type = + GlobalTables::GetTypeTable().GetOrCreatePointerType(*GlobalTables::GetTypeTable().GetPrimType(PTY_i8)); + MIRSymbol *symbol = nullptr; + if (isInit) { + symbol = module->GetMIRBuilder()->CreateSymbol(type->GetTypeIndex(), name, kStVar, kScGlobal, nullptr, kScopeGlobal); + } else { + symbol = GetSymbolFromName(name); + } + MIRStrConst *constValue = module->GetMemPool()->New(value, *type); + symbol->SetKonst(constValue); + if (isInit) { + module->AddSymbol(symbol); + } +} + +void ModuleAddressSanitizer::AddGlobalUIntArray(std::string name, std::vector values, bool isInit) { + MIRArrayType *arrayType = + GlobalTables::GetTypeTable().GetOrCreateArrayType(*GlobalTables::GetTypeTable().GetUInt64(), 0); + MIRSymbol * array = nullptr; + if (isInit) { + array = module->GetMIRBuilder()->CreateSymbol(arrayType->GetTypeIndex(), name, kStVar, kScGlobal, nullptr, kScopeGlobal); + } else { + array = GetSymbolFromName(name); + } + MIRAggConst *constArray = module->GetMemPool()->New(*module, *arrayType); + for (auto v : values) { + constArray->PushBack( + GlobalTables::GetIntConstTable().GetOrCreateIntConst(v, *GlobalTables::GetTypeTable().GetUInt64())); + } + array->SetKonst(constArray); + if (isInit) { + module->AddSymbol(array); + } +} + +void ModuleAddressSanitizer::AddSymbol(MIRSymbol* sym) { + module->AddSymbol(sym); +} + +void ModuleAddressSanitizer::initializeCallbacks() { + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + + ArgVector args(module->GetMPAllocator().Adapter()); + MIRFunction *init_func = + mirBuilder->CreateFunction(kAsanInitializer, *GlobalTables::GetTypeTable().GetVoid(), args, false, true); + MIRFunction *fini_func = + mirBuilder->CreateFunction(kAsanFinisher, *GlobalTables::GetTypeTable().GetVoid(), args, false, true); + init_func->SetAttr(FUNCATTR_local); + fini_func->SetAttr(FUNCATTR_local); + + module->AddFunction(init_func); + module->AddFunction(fini_func); + MIRType *retType = GlobalTables::GetTypeTable().GetVoid(); + + // Declare functions that register/unregister globals. + AsanRegisterGlobals = getOrInsertFunction(mirBuilder, kAsanRegisterGlobalsName, retType, {IntPtrTy, IntPtrTy}); + AsanUnregisterGlobals = getOrInsertFunction(mirBuilder, kAsanUnregisterGlobalsName, retType, {IntPtrTy, IntPtrTy}); +} + +MIRFunction* ModuleAddressSanitizer::GetGlobalCtor() const { + MIRBuilder* mirBuilder = module->GetMIRBuilder(); + MIRFunction *ctor = mirBuilder->GetOrCreateFunction(kAsanInitializer, TyIdx(PTY_void)); + return ctor; +} + +MIRFunction* ModuleAddressSanitizer::GetGlobalDtor() const { + MIRBuilder* mirBuilder = module->GetMIRBuilder(); + MIRFunction *dtor = mirBuilder->GetOrCreateFunction(kAsanFinisher, TyIdx(PTY_void)); + return dtor; +} + +void ModuleAddressSanitizer::AppendStmtToGlobalInit(StmtNode *node) { + MIRFunction *GlobalCtors = GetGlobalCtor(); + GlobalCtors->GetBody()->AddStatement(node); +} + +MIRFunction *ModuleAddressSanitizer::GetOrInsertFunction(const char *name, MIRType *retType, std::vector argTypes) { + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + return getOrInsertFunction(mirBuilder, name, retType, argTypes); +} + +bool ModuleAddressSanitizer::instrumentModule() { + initializeCallbacks(); + MapleVector args(module->GetMIRBuilder()->GetCurrentFuncCodeMpAllocator()->Adapter()); + + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + MIRFunction *__asan_init = + this->GetOrInsertFunction(kAsanInitName, GlobalTables::GetTypeTable().GetVoid(), {}); + MapleVector args__asan_init(mirBuilder->GetCurrentFuncCodeMpAllocator()->Adapter()); + // change current function of the builder since the call statement will be inserted into the __asan_init + auto old_func = mirBuilder->GetCurrentFunction(); + mirBuilder->SetCurrentFunction(*__asan_init); + CallNode *call__asan_init = mirBuilder->CreateStmtCall(__asan_init->GetPuidx(), args__asan_init); + // reset !!!!! + mirBuilder->SetCurrentFunction(*old_func); + this->AppendStmtToGlobalInit(call__asan_init); + // BlockNode *ctorToBeInserted = CreateCtorAndInitFunctions(kAsanModuleCtorName, kAsanInitName, args); + + // InstrumentGlobals(ctorToBeInserted); + + // appendToGlobalCtors(*module, AsanCtorFunction); + if (AsanDtorFunction) { + appendToGlobalDtors(*module, AsanDtorFunction); + } + module->SetSomeSymbolNeedForDecl(false); + return true; +} + +bool ModuleAddressSanitizer::InstrumentGlobals(BlockNode *ctorToBeInserted) { + return false; + std::vector globalsToChange; + for (MIRSymbol *global : GetGlobalVaribles(*module)) { + if (ShouldInstrumentGlobal(global)) { + globalsToChange.push_back(global); } + } + + size_t n = globalsToChange.size(); + if (n == 0) { + return false; + } + FieldVector fieldVector; + FieldVector parentFileds; + std::vector newGlobals(n); + std::vector initializers(n); + + // We initialize an array of such structures and pass it to a run-time call. + GlobalTables::GetTypeTable().PushIntoFieldVector(fieldVector, "beg", *IntPtrTy); + GlobalTables::GetTypeTable().PushIntoFieldVector(fieldVector, "size", *IntPtrTy); + GlobalTables::GetTypeTable().PushIntoFieldVector(fieldVector, "size_with_redzone", *IntPtrTy); + GlobalTables::GetTypeTable().PushIntoFieldVector(fieldVector, "name", *IntPtrTy); + GlobalTables::GetTypeTable().PushIntoFieldVector(fieldVector, "module_name", *IntPtrTy); + GlobalTables::GetTypeTable().PushIntoFieldVector(fieldVector, "has_dynamic_init", *IntPtrTy); + GlobalTables::GetTypeTable().PushIntoFieldVector(fieldVector, "source_location", *IntPtrTy); + GlobalTables::GetTypeTable().PushIntoFieldVector(fieldVector, "odr_indicator", *IntPtrTy); + // Create new type for global with redzones + MIRStructType *globalStructForInitTy = static_cast( + GlobalTables::GetTypeTable().GetOrCreateStructType("GlobalStruct", fieldVector, parentFileds, *module)); - size_t n = globalsToChange.size(); - if (n == 0) { - return false; + for (size_t i = 0; i < n; i++) { + static const uint64_t kMaxGlobalRedzone = 1 << 18; + MIRSymbol *global = globalsToChange[i]; + // Compute the size of redzone + size_t sizeInBytes = global->GetType()->GetSize(); + size_t minRedZone = MinRedzoneSizeForGlobal(); + size_t redzone = std::max(minRedZone, std::min(kMaxGlobalRedzone, ((sizeInBytes / minRedZone) / 4) * minRedZone)); + size_t rightRedzoneSize = redzone; + if (sizeInBytes % minRedZone) { + rightRedzoneSize += minRedZone - (sizeInBytes % minRedZone); } - FieldVector fieldVector; - FieldVector parentFileds; - std::vector newGlobals(n); - std::vector initializers(n); - - // We initialize an array of such structures and pass it to a run-time call. - GlobalTables::GetTypeTable().PushIntoFieldVector( - fieldVector, "beg", *IntPtrTy); - GlobalTables::GetTypeTable().PushIntoFieldVector( - fieldVector, "size", *IntPtrTy); - GlobalTables::GetTypeTable().PushIntoFieldVector( - fieldVector, "size_with_redzone", *IntPtrTy); - GlobalTables::GetTypeTable().PushIntoFieldVector( - fieldVector, "name", *IntPtrTy); - GlobalTables::GetTypeTable().PushIntoFieldVector( - fieldVector, "module_name", *IntPtrTy); - GlobalTables::GetTypeTable().PushIntoFieldVector( - fieldVector, "has_dynamic_init", *IntPtrTy); - GlobalTables::GetTypeTable().PushIntoFieldVector( - fieldVector, "source_location", *IntPtrTy); - GlobalTables::GetTypeTable().PushIntoFieldVector( - fieldVector, "odr_indicator", *IntPtrTy); + ASSERT(((rightRedzoneSize + sizeInBytes) % minRedZone) == 0, + "rightRedzoneSize + sizeInBytes cannot be divided by minRedZone"); + // Create new type for global with redzones - MIRStructType *globalStructForInitTy = static_cast( - GlobalTables::GetTypeTable().GetOrCreateStructType( - "GlobalStruct", fieldVector, parentFileds, *module)); - - for (size_t i = 0; i < n; i++) { - static const uint64_t kMaxGlobalRedzone = 1 << 18; - MIRSymbol *global = globalsToChange[i]; - // Compute the size of redzone - size_t sizeInBytes = global->GetType()->GetSize(); - size_t minRedZone = MinRedzoneSizeForGlobal(); - size_t redzone = std::max(minRedZone, - std::min(kMaxGlobalRedzone, ((sizeInBytes / minRedZone) / 4) * minRedZone)); - size_t rightRedzoneSize = redzone; - if (sizeInBytes % minRedZone) { - rightRedzoneSize += minRedZone - (sizeInBytes % minRedZone); - } - ASSERT(((rightRedzoneSize + sizeInBytes) % minRedZone) == 0, - "rightRedzoneSize + sizeInBytes cannot be divided by minRedZone"); - - // Create new type for global with redzones - fieldVector.clear(); - parentFileds.clear(); - CHECK_FATAL(rightRedzoneSize < UINT32_MAX, "Too large redzone size."); - MIRArrayType *rightRe dZoneTy = GlobalTables::GetTypeTable().GetOrCreateArrayType( - *GlobalTables::GetTypeTable().GetInt8(), rightRedzoneSize); - GlobalTables::GetTypeTable().PushIntoFieldVector(fieldVector, "orig", *global->GetType()); - GlobalTables::GetTypeTable().PushIntoFieldVector(fieldVector, "redzone", *rightRedZoneTy); - MIRStructType *newGlobalType = static_cast( - GlobalTables::GetTypeTable().GetOrCreateStructType( - "NewGlobal_" + global->GetName(), fieldVector, parentFileds, *module)); - - // Create new variable for global with redzones - MIRSymbol *newGlobalVar = module->GetMIRBuilder()->CreateSymbol( - newGlobalType->GetTypeIndex(), "", global->GetSKind(), - global->GetStorageClass(), nullptr, kScopeGlobal); - - // Initialize the new global - MIRAggConst *newGlobalConst = module->GetMemPool()-> - New(*module, *newGlobalVar->GetType()); - // Initialize the field orig - MIRConst *globalConst = global->GetKonst(); - MIRConst *globalConstClone; - if (globalConst->GetKind() == kConstInt) { - globalConstClone = GlobalTables::GetIntConstTable().GetOrCreateIntConst( - static_cast(globalConst)->GetValue(), globalConst->GetType()); - } else { - globalConstClone = globalConst->Clone(*module->GetMemPool()); - } - newGlobalConst->AddItem(globalConstClone, 1); - // Initialize the field redzone - MIRAggConst *arrayConst = module->GetMemPool()->New(*module, *rightRedZoneTy); - for (size_t j = 0; j < rightRedzoneSize; j++) { - arrayConst->AddItem(GlobalTables::GetIntConstTable().GetOrCreateIntConst( - 0, *GlobalTables::GetTypeTable().GetInt8()), 0); - } + fieldVector.clear(); + parentFileds.clear(); + CHECK_FATAL(rightRedzoneSize < UINT32_MAX, "Too large redzone size."); + MIRArrayType *rightRedZoneTy = GlobalTables::GetTypeTable().GetOrCreateArrayType( + *GlobalTables::GetTypeTable().GetInt8(), uint32_t(rightRedzoneSize)); + GlobalTables::GetTypeTable().PushIntoFieldVector(fieldVector, "orig", *global->GetType()); + GlobalTables::GetTypeTable().PushIntoFieldVector(fieldVector, "redzone", *rightRedZoneTy); + MIRStructType *newGlobalType = static_cast(GlobalTables::GetTypeTable().GetOrCreateStructType( + "NewGlobal_" + global->GetName(), fieldVector, parentFileds, *module)); - newGlobalConst->AddItem(arrayConst, 2); - // Set the initialized value to - newGlobalVar->SetKonst(newGlobalConst); - // Make the new created one the same as the old global variable - newGlobalVar->SetAttrs(global->GetAttrs()); - newGlobalVar->SetNameStrIdx(global->GetName()); - // Set source location - newGlobalVar->SetSrcPosition(global->GetSrcPosition()); - - // replace global variable field Id - for (MIRSymbol *mirSymbol: symbolUsedInInit[newGlobalVar->GetName()]) { - MIRAddrofConst *mirAddrofConst = dynamic_cast(mirSymbol->GetKonst()); - MIRAddrofConst *newAddrofConst = module->GetMemPool()->New( - mirAddrofConst->GetSymbolIndex(), 1, mirAddrofConst->GetType()); - mirSymbol->SetKonst(newAddrofConst); - } - // replace statement field Id - for (BaseNode *stmtNode: symbolUsedInStmt[newGlobalVar->GetName()]) { - switch (stmtNode->GetOpCode()) { - case OP_dassign: { - DassignNode *dassignNode = dynamic_cast(stmtNode); - dassignNode->SetStIdx(newGlobalVar->GetStIdx()); - dassignNode->SetFieldID(1 + dassignNode->GetFieldID()); - break; - } - case OP_dread: - case OP_addrof: { - AddrofNode *addrofNode = dynamic_cast(stmtNode); - addrofNode->SetStIdx(newGlobalVar->GetStIdx()); - addrofNode->SetFieldID(1 + addrofNode->GetFieldID()); - break; - } - case OP_callassigned: { - CallNode *callNode = dynamic_cast(stmtNode); - CallReturnVector &callRet = callNode->GetReturnVec(); - for (size_t j = 0; j < callRet.size(); j++) { - StIdx idx = callRet[j].first; - RegFieldPair regFieldPair = callRet[j].second; - if (!regFieldPair.IsReg()) { - if (idx == global->GetStIdx()) { - callRet[j].first = newGlobalVar->GetStIdx(); - callRet[j].second.SetFieldID(1 + callRet[j].second.GetFieldID()); - } + // Create new variable for global with redzones + MIRSymbol *newGlobalVar = module->GetMIRBuilder()->CreateSymbol( + newGlobalType->GetTypeIndex(), "", global->GetSKind(), global->GetStorageClass(), nullptr, kScopeGlobal); + + // Initialize the new global + MIRAggConst *newGlobalConst = module->GetMemPool()->New(*module, *newGlobalVar->GetType()); + // Initialize the field orig + MIRConst *globalConst = global->GetKonst(); + MIRConst *globalConstClone; + if (globalConst->GetKind() == kConstInt) { + globalConstClone = GlobalTables::GetIntConstTable().GetOrCreateIntConst( + static_cast(globalConst)->GetValue(), globalConst->GetType()); + } else { + globalConstClone = globalConst->Clone(*module->GetMemPool()); + } + newGlobalConst->AddItem(globalConstClone, 1); + // Initialize the field redzone + MIRAggConst *arrayConst = module->GetMemPool()->New(*module, *rightRedZoneTy); + for (size_t j = 0; j < rightRedzoneSize; j++) { + arrayConst->AddItem( + GlobalTables::GetIntConstTable().GetOrCreateIntConst(0, *GlobalTables::GetTypeTable().GetInt8()), 0); + } + + newGlobalConst->AddItem(arrayConst, 2); + // Set the initialized value to + newGlobalVar->SetKonst(newGlobalConst); + // Make the new created one the same as the old global variable + newGlobalVar->SetAttrs(global->GetAttrs()); + newGlobalVar->SetNameStrIdx(global->GetName()); + // Set source location + newGlobalVar->SetSrcPosition(global->GetSrcPosition()); + + // replace global variable field Id + for (MIRSymbol *mirSymbol : symbolUsedInInit[newGlobalVar->GetName()]) { + MIRAddrofConst *mirAddrofConst = dynamic_cast(mirSymbol->GetKonst()); + MIRAddrofConst *newAddrofConst = + module->GetMemPool()->New(mirAddrofConst->GetSymbolIndex(), 1, mirAddrofConst->GetType()); + mirSymbol->SetKonst(newAddrofConst); + } + // replace statement field Id + for (BaseNode *stmtNode : symbolUsedInStmt[newGlobalVar->GetName()]) { + switch (stmtNode->GetOpCode()) { + case OP_dassign: { + DassignNode *dassignNode = dynamic_cast(stmtNode); + dassignNode->SetStIdx(newGlobalVar->GetStIdx()); + dassignNode->SetFieldID(1 + dassignNode->GetFieldID()); + break; + } + case OP_dread: + case OP_addrof: { + AddrofNode *addrofNode = dynamic_cast(stmtNode); + addrofNode->SetStIdx(newGlobalVar->GetStIdx()); + addrofNode->SetFieldID(1 + addrofNode->GetFieldID()); + break; + } + case OP_callassigned: { + CallNode *callNode = dynamic_cast(stmtNode); + CallReturnVector &callRet = callNode->GetReturnVec(); + for (size_t j = 0; j < callRet.size(); j++) { + StIdx idx = callRet[j].first; + RegFieldPair regFieldPair = callRet[j].second; + if (!regFieldPair.IsReg()) { + if (idx == global->GetStIdx()) { + callRet[j].first = newGlobalVar->GetStIdx(); + callRet[j].second.SetFieldID(1 + callRet[j].second.GetFieldID()); } } - break; - } - default: { } + break; + } + default: { } } - global->SetIsDeleted(); - newGlobalVar->ResetIsDeleted(); - // Create a new variable and construct its initial value - MIRAggConst *initializer = module->GetMemPool()->New(*module, *globalStructForInitTy); - - // begin - MIRAddrofConst *beginConst = createAddrofConst(*module, newGlobalVar, IntPtrPrim); - initializer->AddItem(beginConst, 1); - // size - MIRIntConst *sizeInBytesConst = GlobalTables::GetIntConstTable(). - GetOrCreateIntConst(sizeInBytes, *IntPtrTy); - initializer->AddItem(sizeInBytesConst, 2); - // size with redzone - MIRIntConst *sizeWithRedzoneConst = GlobalTables::GetIntConstTable(). - GetOrCreateIntConst(sizeInBytes + rightRedzoneSize, *IntPtrTy); - initializer->AddItem(sizeWithRedzoneConst, 3); - // variable name - MIRStrConst *nameConst = createStringConst(*module, newGlobalVar->GetName(), PTY_a64); - initializer->AddItem(nameConst, 4); - // module name - MIRStrConst *moduleNameConst = createStringConst(*module, module->GetFileName(), PTY_a64); - initializer->AddItem(moduleNameConst, 5); - // isDynInit - MIRIntConst *isDynInit = GlobalTables::GetIntConstTable().GetOrCreateIntConst(0, *IntPtrTy); - initializer->AddItem(isDynInit, 6); - // Set source location - MIRConst *sourceLocConst = createSourceLocConst(*module, newGlobalVar, IntPtrPrim); - initializer->AddItem(sourceLocConst, 7); - // Set OdrIndicator - MIRConst *odrIndicator = GlobalTables::GetIntConstTable().GetOrCreateIntConst(0, *IntPtrTy); - initializer->AddItem(odrIndicator, 8); - // Set the value of initializer - LogInfo::MapleLogger() << "NEW GLOBAL: " << newGlobalVar->GetName() << "\n"; - newGlobals[i] = newGlobalVar; - initializers[i] = initializer; } - InstrumentGlobalsWithMetadataArray(ctorToBeInserted, newGlobals, initializers); + global->SetIsDeleted(); + newGlobalVar->ResetIsDeleted(); + // Create a new variable and construct its initial value + MIRAggConst *initializer = module->GetMemPool()->New(*module, *globalStructForInitTy); + + // begin + MIRAddrofConst *beginConst = createAddrofConst(*module, newGlobalVar, IntPtrPrim); + initializer->AddItem(beginConst, 1); + // size + MIRIntConst *sizeInBytesConst = GlobalTables::GetIntConstTable().GetOrCreateIntConst(sizeInBytes, *IntPtrTy); + initializer->AddItem(sizeInBytesConst, 2); + // size with redzone + MIRIntConst *sizeWithRedzoneConst = + GlobalTables::GetIntConstTable().GetOrCreateIntConst(sizeInBytes + rightRedzoneSize, *IntPtrTy); + initializer->AddItem(sizeWithRedzoneConst, 3); + // variable name + MIRStrConst *nameConst = createStringConst(*module, newGlobalVar->GetName(), PTY_a64); + initializer->AddItem(nameConst, 4); + // module name + MIRStrConst *moduleNameConst = createStringConst(*module, module->GetFileName(), PTY_a64); + initializer->AddItem(moduleNameConst, 5); + // isDynInit + MIRIntConst *isDynInit = GlobalTables::GetIntConstTable().GetOrCreateIntConst(0, *IntPtrTy); + initializer->AddItem(isDynInit, 6); + // Set source location + MIRConst *sourceLocConst = createSourceLocConst(*module, newGlobalVar, IntPtrPrim); + initializer->AddItem(sourceLocConst, 7); + // Set OdrIndicator + MIRConst *odrIndicator = GlobalTables::GetIntConstTable().GetOrCreateIntConst(0, *IntPtrTy); + initializer->AddItem(odrIndicator, 8); + // Set the value of initializer + LogInfo::MapleLogger() << "NEW GLOBAL: " << newGlobalVar->GetName() << "\n"; + newGlobals[i] = newGlobalVar; + initializers[i] = initializer; + } + InstrumentGlobalsWithMetadataArray(ctorToBeInserted, newGlobals, initializers); + return false; +} + +bool ModuleAddressSanitizer::ShouldInstrumentGlobal(MIRSymbol *var) { + MIRType *type = GlobalTables::GetTypeTable().GetTypeFromTyIdx(var->GetTyIdx()); + if (type == nullptr) { + return false; + } + if (!isTypeSized(type)) { + return false; + } + if (var->GetValue().konst == nullptr) { + return false; + } + if (type->GetAlign() > MinRedzoneSizeForGlobal()) { return false; } + return true; +} - bool ModuleAddressSanitizer::ShouldInstrumentGlobal(MIRSymbol *var) { - MIRType *type = GlobalTables::GetTypeTable().GetTypeFromTyIdx(var->GetTyIdx()); - if (type == nullptr) { - return false; - } - if (!isTypeSized(type)) { - return false; - } - if (var->GetValue().konst == nullptr) { - return false; - } - if (type->GetAlign() > MinRedzoneSizeForGlobal()) { - return false; - } - return true; +void ModuleAddressSanitizer::InstrumentGlobalsWithMetadataArray(BlockNode *ctorToBeInserted, + const std::vector ExtendedGlobals, + std::vector MetadataInitializers) { + assert(ExtendedGlobals.size() == MetadataInitializers.size()); + size_t N = ExtendedGlobals.size(); + CHECK_FATAL(N > 0, "Zero size extended globals."); + MIRArrayType *arrayOfGlobalStructTy = + GlobalTables::GetTypeTable().GetOrCreateArrayType(MetadataInitializers[0]->GetType(), N); + MIRAggConst *allGlobalsConst = module->GetMemPool()->New(*module, *arrayOfGlobalStructTy); + for (MIRConst *meta : MetadataInitializers) { + allGlobalsConst->PushBack(meta); } - void ModuleAddressSanitizer::InstrumentGlobalsWithMetadataArray( - BlockNode *ctorToBeInserted, - const std::vector ExtendedGlobals, - std::vector MetadataInitializers) { - assert(ExtendedGlobals.size() == MetadataInitializers.size()); - size_t N = ExtendedGlobals.size(); - CHECK_FATAL(N > 0, "Zero size extended globals."); - MIRArrayType *arrayOfGlobalStructTy = GlobalTables::GetTypeTable().GetOrCreateArrayType( - MetadataInitializers[0]->GetType(), N); - MIRAggConst *allGlobalsConst = module->GetMemPool()->New(*module, *arrayOfGlobalStructTy); - for (MIRConst *meta: MetadataInitializers) { - allGlobalsConst->PushBack(meta); - } + MIRSymbol *allGlobalsVar = module->GetMIRBuilder()->CreateSymbol(arrayOfGlobalStructTy->GetTypeIndex(), "allGlobals", + kStConst, kScFstatic, nullptr, kScopeGlobal); + allGlobalsVar->SetKonst(allGlobalsConst); + MapleVector registerGlobal(module->GetMPAllocator().Adapter()); + AddrofNode *addrofNode = module->GetMIRBuilder()->CreateAddrof(*allGlobalsVar, IntPtrPrim); + ConstvalNode *constvalNode = module->GetMIRBuilder()->CreateIntConst(N, IntPtrPrim); + registerGlobal.emplace_back(addrofNode); + registerGlobal.emplace_back(constvalNode); + CallNode *registerCallNode = module->GetMIRBuilder()->CreateStmtCall(AsanRegisterGlobals->GetPuidx(), registerGlobal); + ctorToBeInserted->InsertBefore(ctorToBeInserted->GetLast(), registerCallNode); + BlockNode *dtorTobeInserted = CreateModuleDtor(); + // We also need to unregister globals at the end, e.g., when a shared library + // gets closed. + CallNode *unRegisterCallNode = + module->GetMIRBuilder()->CreateStmtCall(AsanUnregisterGlobals->GetPuidx(), registerGlobal); + dtorTobeInserted->InsertBefore(dtorTobeInserted->GetLast(), unRegisterCallNode); +} - MIRSymbol *allGlobalsVar = module->GetMIRBuilder()->CreateSymbol( - arrayOfGlobalStructTy->GetTypeIndex(), "allGlobals", kStConst, kScFstatic, nullptr, kScopeGlobal); - allGlobalsVar->SetKonst(allGlobalsConst); - MapleVector registerGlobal(module->GetMPAllocator().Adapter()); - AddrofNode *addrofNode = module->GetMIRBuilder()->CreateAddrof(*allGlobalsVar, IntPtrPrim); - ConstvalNode *constvalNode = module->GetMIRBuilder()->CreateIntConst(N, IntPtrPrim); - registerGlobal.emplace_back(addrofNode); - registerGlobal.emplace_back(constvalNode); - CallNode *registerCallNode = module->GetMIRBuilder()->CreateStmtCall( - AsanRegisterGlobals->GetPuidx(), registerGlobal); - ctorToBeInserted->InsertBefore(ctorToBeInserted->GetLast(), registerCallNode); - BlockNode *dtorTobeInserted = CreateModuleDtor(); - // We also need to unregister globals at the end, e.g., when a shared library - // gets closed. - CallNode *unRegisterCallNode = module->GetMIRBuilder()->CreateStmtCall( - AsanUnregisterGlobals->GetPuidx(), registerGlobal); - dtorTobeInserted->InsertBefore(dtorTobeInserted->GetLast(), unRegisterCallNode); - } +BlockNode *ModuleAddressSanitizer::CreateCtorAndInitFunctions(const std::string CtorName, const std::string InitName, + const MapleVector InitArgs) { + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + ArgVector args(module->GetMPAllocator().Adapter()); + AsanCtorFunction = mirBuilder->CreateFunction(CtorName, *GlobalTables::GetTypeTable().GetVoid(), args); + module->AddFunction(AsanCtorFunction); + AsanCtorFunction->SetAttr(FUNCATTR_local); + BlockNode *asanCtorBlock = AsanCtorFunction->GetBody(); + StmtNode *retNode = mirBuilder->CreateStmtReturn(nullptr); + asanCtorBlock->AddStatement(retNode); - BlockNode *ModuleAddressSanitizer::CreateCtorAndInitFunctions( - const std::string CtorName, const std::string InitName, const MapleVector InitArgs) { - MIRBuilder *mirBuilder = module->GetMIRBuilder(); - ArgVector args(module->GetMPAllocator().Adapter()); - AsanCtorFunction = mirBuilder->CreateFunction(CtorName, *GlobalTables::GetTypeTable().GetVoid(), args); - module->AddFunction(AsanCtorFunction); - AsanCtorFunction->SetAttr(FUNCATTR_local); - BlockNode *asanCtorBlock = AsanCtorFunction->GetBody(); - StmtNode *retNode = mirBuilder->CreateStmtReturn(nullptr); - asanCtorBlock->AddStatement(retNode); - - MIRFunction *initFunction = getOrInsertFunction(mirBuilder, InitName.c_str(), - GlobalTables::GetTypeTable().GetVoid(), {}); - CallNode *callInitNode = mirBuilder->CreateStmtCall(initFunction->GetPuidx(), InitArgs); - - asanCtorBlock->InsertBefore(retNode, callInitNode); - return asanCtorBlock; - } + MIRFunction *initFunction = + getOrInsertFunction(mirBuilder, InitName.c_str(), GlobalTables::GetTypeTable().GetVoid(), {}); + CallNode *callInitNode = mirBuilder->CreateStmtCall(initFunction->GetPuidx(), InitArgs); - BlockNode *ModuleAddressSanitizer::CreateModuleDtor() { - MIRBuilder *mirBuilder = module->GetMIRBuilder(); - ArgVector args(module->GetMPAllocator().Adapter()); - AsanDtorFunction = mirBuilder->CreateFunction(kAsanModuleDtorName, - *GlobalTables::GetTypeTable().GetVoid(), args); - module->AddFunction(AsanDtorFunction); - AsanDtorFunction->SetAttr(FUNCATTR_local); - BlockNode *asanDtorBlock = AsanDtorFunction->GetBody(); - StmtNode *retNode = mirBuilder->CreateStmtReturn(nullptr); - asanDtorBlock->AddStatement(retNode); - - return asanDtorBlock; - } - - void ModuleAddressSanitizer::GetGlobalSymbolUsage() { - // Replace all old global users with new global - for (MIRFunction *func : module->GetFunctionList()) { - if (func == nullptr || func->GetBody() == nullptr) { - continue; - } - std::stack baseNodeStack; - StmtNodes &stmtNodes = func->GetBody()->GetStmtNodes(); - for (StmtNode &stmt : stmtNodes) { - baseNodeStack.push(&stmt); - } + asanCtorBlock->InsertBefore(retNode, callInitNode); + return asanCtorBlock; +} - while (!baseNodeStack.empty()) { - BaseNode *baseNode = baseNodeStack.top(); - baseNodeStack.pop(); - switch (baseNode->GetOpCode()) { - case OP_dassign: { - DassignNode *dassignNode = dynamic_cast(baseNode); - MIRSymbol *mirSymbol = func->GetLocalOrGlobalSymbol(dassignNode->GetStIdx()); - if (mirSymbol->IsGlobal()) { - if (symbolUsedInStmt.count(mirSymbol->GetName()) == 0) { - symbolUsedInStmt[mirSymbol->GetName()] = {}; - } - symbolUsedInStmt[mirSymbol->GetName()].insert(dassignNode); +BlockNode *ModuleAddressSanitizer::CreateModuleDtor() { + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + ArgVector args(module->GetMPAllocator().Adapter()); + AsanDtorFunction = mirBuilder->CreateFunction(kAsanModuleDtorName, *GlobalTables::GetTypeTable().GetVoid(), args); + module->AddFunction(AsanDtorFunction); + AsanDtorFunction->SetAttr(FUNCATTR_local); + BlockNode *asanDtorBlock = AsanDtorFunction->GetBody(); + StmtNode *retNode = mirBuilder->CreateStmtReturn(nullptr); + asanDtorBlock->AddStatement(retNode); + + return asanDtorBlock; +} + +void ModuleAddressSanitizer::GetGlobalSymbolUsage() { + // Replace all old global users with new global + for (MIRFunction *func : module->GetFunctionList()) { + if (func == nullptr || func->GetBody() == nullptr) { + continue; + } + std::stack baseNodeStack; + StmtNodes &stmtNodes = func->GetBody()->GetStmtNodes(); + for (StmtNode &stmt : stmtNodes) { + baseNodeStack.push(&stmt); + } + + while (!baseNodeStack.empty()) { + BaseNode *baseNode = baseNodeStack.top(); + baseNodeStack.pop(); + switch (baseNode->GetOpCode()) { + case OP_dassign: { + DassignNode *dassignNode = dynamic_cast(baseNode); + MIRSymbol *mirSymbol = func->GetLocalOrGlobalSymbol(dassignNode->GetStIdx()); + if (mirSymbol->IsGlobal()) { + if (symbolUsedInStmt.count(mirSymbol->GetName()) == 0) { + symbolUsedInStmt[mirSymbol->GetName()] = {}; } - break; + symbolUsedInStmt[mirSymbol->GetName()].insert(dassignNode); } - case OP_dread: - case OP_addrof: { - AddrofNode *addrofNode = dynamic_cast(baseNode); - MIRSymbol *mirSymbol = func->GetLocalOrGlobalSymbol(addrofNode->GetStIdx()); - if (mirSymbol->IsGlobal()) { - if (symbolUsedInStmt.count(mirSymbol->GetName()) == 0) { - symbolUsedInStmt[mirSymbol->GetName()] = {}; - } - symbolUsedInStmt[mirSymbol->GetName()].insert(addrofNode); + break; + } + case OP_dread: + case OP_addrof: { + AddrofNode *addrofNode = dynamic_cast(baseNode); + MIRSymbol *mirSymbol = func->GetLocalOrGlobalSymbol(addrofNode->GetStIdx()); + if (mirSymbol->IsGlobal()) { + if (symbolUsedInStmt.count(mirSymbol->GetName()) == 0) { + symbolUsedInStmt[mirSymbol->GetName()] = {}; } - break; + symbolUsedInStmt[mirSymbol->GetName()].insert(addrofNode); } - case OP_callassigned: { - CallNode *callNode = dynamic_cast(baseNode); - CallReturnVector &callRet = callNode->GetReturnVec(); - for (size_t i = 0; i < callRet.size(); i++) { - StIdx idx = callRet[i].first; - RegFieldPair regFieldPair = callRet[i].second; - if (!regFieldPair.IsReg()) { - MIRSymbol *mirSymbol = func->GetLocalOrGlobalSymbol(idx); - if (mirSymbol->IsGlobal()) { - if (symbolUsedInStmt.count(mirSymbol->GetName()) == 0) { - symbolUsedInStmt[mirSymbol->GetName()] = {}; - } - symbolUsedInStmt[mirSymbol->GetName()].insert(callNode); + break; + } + case OP_callassigned: { + CallNode *callNode = dynamic_cast(baseNode); + CallReturnVector &callRet = callNode->GetReturnVec(); + for (size_t i = 0; i < callRet.size(); i++) { + StIdx idx = callRet[i].first; + RegFieldPair regFieldPair = callRet[i].second; + if (!regFieldPair.IsReg()) { + MIRSymbol *mirSymbol = func->GetLocalOrGlobalSymbol(idx); + if (mirSymbol->IsGlobal()) { + if (symbolUsedInStmt.count(mirSymbol->GetName()) == 0) { + symbolUsedInStmt[mirSymbol->GetName()] = {}; } + symbolUsedInStmt[mirSymbol->GetName()].insert(callNode); } } - break; } - default: - break; - } - for (size_t j = 0; j < baseNode->NumOpnds(); j++) { - baseNodeStack.push(baseNode->Opnd(j)); + break; } + default: + break; + } + for (size_t j = 0; j < baseNode->NumOpnds(); j++) { + baseNodeStack.push(baseNode->Opnd(j)); } } - for (MIRSymbol *mirSymbol: GetGlobalVaribles(*module)) { - if (mirSymbol->GetKonst()) { - MIRConst *mirConst = mirSymbol->GetKonst(); - if (mirConst->GetKind() == kConstAddrof) { - MIRAddrofConst *mirAddrofConst = dynamic_cast(mirConst); - MIRSymbol *mirSymbolUsed = GlobalTables::GetGsymTable().GetSymbolFromStidx(mirAddrofConst->GetSymbolIndex().Idx()); - if (symbolUsedInInit.count(mirSymbolUsed->GetName()) == 0) { - symbolUsedInInit[mirSymbolUsed->GetName()] = {}; - } - symbolUsedInInit[mirSymbolUsed->GetName()].insert(mirSymbol); + } + for (MIRSymbol *mirSymbol : GetGlobalVaribles(*module)) { + if (mirSymbol->GetKonst()) { + MIRConst *mirConst = mirSymbol->GetKonst(); + if (mirConst->GetKind() == kConstAddrof) { + MIRAddrofConst *mirAddrofConst = dynamic_cast(mirConst); + MIRSymbol *mirSymbolUsed = + GlobalTables::GetGsymTable().GetSymbolFromStidx(mirAddrofConst->GetSymbolIndex().Idx()); + if (symbolUsedInInit.count(mirSymbolUsed->GetName()) == 0) { + symbolUsedInInit[mirSymbolUsed->GetName()] = {}; } + symbolUsedInInit[mirSymbolUsed->GetName()].insert(mirSymbol); } } } } +} // namespace maple #endif \ No newline at end of file diff --git a/src/mapleall/maple_san/src/asan_neighbor_opt.cpp b/src/mapleall/maple_san/src/asan_neighbor_opt.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c7b392ec7d7fded16258e5c5a965cecf360ec231 --- /dev/null +++ b/src/mapleall/maple_san/src/asan_neighbor_opt.cpp @@ -0,0 +1,484 @@ +#ifdef ENABLE_MAPLE_SAN + +#include + +#include "asan_phases.h" + +namespace maple { + +// we need to handle SSANode +AddrofNode *MEDoNeighborOpt::getAddrofNode(BaseNode *baseNode) { + SSANode *ssaCheck = dynamic_cast(baseNode); + if (ssaCheck != nullptr) + return dynamic_cast(dynamic_cast(baseNode)->GetNoSSANode()); + else + return dynamic_cast(baseNode); +} + +uint32 MEDoNeighborOpt::getOffset(BaseNode *baseNode) { + uint32 offset = 0; + switch (baseNode->GetOpCode()) { + case OP_iassign: { + IassignNode *iassign = dynamic_cast(baseNode); + MIRPtrType *ptrType = + dynamic_cast(GlobalTables::GetTypeTable().GetTypeFromTyIdx(iassign->GetTyIdx())); + MIRType *pointedType = ptrType->GetPointedType(); + if (pointedType->IsMIRStructType()) { + offset = getStructOffset(iassign, ptrType, iassign->GetFieldID()); + } else { + offset = getArrayOffset(iassign, ptrType, iassign->GetFieldID()); + } + + break; + } + case OP_iread: { + IreadNode *iread = dynamic_cast(baseNode); + CHECK_FATAL((iread != nullptr), "Invalid IR node with OpCode OP_iread."); + MIRPtrType *ptrType = + dynamic_cast(GlobalTables::GetTypeTable().GetTypeFromTyIdx(iread->GetTyIdx())); + MIRType *pointedType = ptrType->GetPointedType(); + if (pointedType->IsMIRStructType()) { + offset = getStructOffset(iread, ptrType, iread->GetFieldID()); + } else { + offset = getArrayOffset(iread, ptrType, iread->GetFieldID()); + } + break; + } + default: { + } + } + + return offset; +} + +uint32 MEDoNeighborOpt::getStructOffset(BaseNode *node, MIRPtrType *ptrType, FieldID fieldID) { + MIRType* pointedType = ptrType->GetPointedType(); + if (fieldID == 0) { + uint32 size = pointedType->GetSize(); + this->node2size[node] = size; + this->node2offset[node] = 0; + return 0; + } else { + MIRStructType* tmp = dynamic_cast(pointedType); + CHECK_FATAL((tmp != nullptr), "Unknown type with fieldID access."); + uint32 offset = tmp->GetFieldOffsetFromBaseAddr(fieldID).byteOffset; + this->node2offset[node] = offset; + MIRType *mirtype = GlobalTables::GetTypeTable().GetTypeFromTyIdx(tmp->GetFieldTyIdx(fieldID)); + this->node2size[node] = mirtype->GetSize(); + return offset; + } +} + +uint32 MEDoNeighborOpt::getArrayOffset(BaseNode *node, MIRPtrType *ptrType, FieldID fieldID) { + BaseNode *opnd = node->Opnd(0); + // opnd->Dump(); + if (opnd->GetOpCode() == OP_add) { + // pattern: base + offset + BaseNode *addrExpr = opnd->Opnd(0); + BaseNode *offsetExpr = opnd->Opnd(1); + if (addrExpr->GetOpCode() == OP_dread && offsetExpr->GetOpCode() == OP_constval) { + ConstvalNode *offsetNode = dynamic_cast(offsetExpr); + if (offsetNode == nullptr) return -1; + MIRIntConst *intConst = dynamic_cast(offsetNode->GetConstVal()); + if (intConst == nullptr) return -1; + uint32 offset = intConst->GetExtValue(); + + MIRType *pointedType = ptrType->GetPointedType(); + size_t size = pointedType->GetSize(); + this->node2size[node] = size; + this->node2offset[node] = offset; + // LogInfo::MapleLogger() << "getArrayOffset\n"; + // node->Dump(); + // LogInfo::MapleLogger() << "size: " << size << " offset: " << offset << "\n"; + return offset; + } + } + return 0xffffffff; +} + +void MEDoNeighborOpt::DumpIntermediateResult() { + std::map>::iterator itr; + for (itr = this->def2nodes.begin(); itr != this->def2nodes.end(); ++itr) { + const StmtNode *defStmt = itr->first; + std::vector targetNodes = itr->second; + LogInfo::MapleLogger() << "defStmt:\n"; + defStmt->Dump(); + LogInfo::MapleLogger() << "use Nodes:\n"; + for (auto *useNode : targetNodes) { + useNode->Dump(); + LogInfo::MapleLogger() << "size: " << this->node2size[useNode] << "\n"; + LogInfo::MapleLogger() << "offset: " << this->node2offset[useNode] << "\n"; + } + } +} + +void MEDoNeighborOpt::DumpResult() { + std::vector::iterator itr; + for (itr = this->results.begin(); itr != this->results.end(); ++itr) { + SrcPosition stmtPos = itr->stmtPos; + std::vector directions = itr->directions; + Opcode op = itr->op; + int64_t newSize = itr->newSize; + + LogInfo::MapleLogger() << "stmtPos:\n"; + stmtPos.Dump(); + LogInfo::MapleLogger() << "directions:\n"; + for (auto dir : directions) { + LogInfo::MapleLogger() << dir << " "; + } + LogInfo::MapleLogger() << "\n"; + LogInfo::MapleLogger() << "op: " << int(op) << "\n"; + LogInfo::MapleLogger() << "newSize: " << newSize << "\n"; + } +} + +std::vector> MEDoNeighborOpt::sortOffset() { + std::vector> sortVec; + for (auto &it : this->node2offset) { + sortVec.push_back(it); + } + std::sort(sortVec.begin(), sortVec.end(), this->cmp); + return sortVec; +} + +void MEDoNeighborOpt::NeighborOptEntry(BB *bb) { + this->node2offset.clear(); + this->node2size.clear(); + this->node2def.clear(); + this->def2nodes.clear(); + + for (StmtNode &stmt : bb->GetStmtNodes()) { + /* cannot mark on current function directly, + because [irmapbuild + premeemi] phases will rebuild (optimize) the function. + have no choice but a StmtNode level granularity optimization + */ + + // visit all node, to find def stmt node + std::vector directions; + TraverseNodes(&stmt, &stmt, directions); + } + + // filter and update result + filterRemovableMemAccesses(); +} + +void MEDoNeighborOpt::TraverseNodes(StmtNode *stmt, BaseNode *baseNode, std::vector curDirections) { + // LogInfo::MapleLogger() << "MEDoNeighborOpt::TraverseNodes \n"; + // stmt->Dump(); + // baseNode->Dump(2); + // LogInfo::MapleLogger() << "\n"; + // SrcPosition srcPos = stmt->GetSrcPos(); + // srcPos.Dump(); + if (baseNode != nullptr) { + BaseNode *mapKey = baseNode; + if (baseNode->IsSSANode()) { + mapKey = dynamic_cast(baseNode)->GetNoSSANode(); + } + this->node2stmt[mapKey] = stmt; + this->node2directions[mapKey] = curDirections; + + switch (baseNode->GetOpCode()) { + case OP_iassign: { + IassignNode *iassign = dynamic_cast(baseNode); + CHECK_FATAL((iassign != nullptr), "Invalid IR node with OpCode OP_iassign"); + BaseNode *addrExpr = iassign->Opnd(0); + + // struct pattern: we want to see where the addr-expr comes from + auto *addrofSSANode = dynamic_cast(addrExpr); + + // array pattern: base ptr + offset + if (addrExpr->GetOpCode() == OP_add) { + addrExpr = addrExpr->Opnd(0); + // we want to see where the addr-expr comes from + addrofSSANode = dynamic_cast(addrExpr); + } + + if (addrofSSANode == nullptr) break; // without def-use info, or not an addrof node + + // Collect information + CHECK_FATAL((addrofSSANode != nullptr), "not a valid SSA Node"); + const VersionSt *verSt = addrofSSANode->GetSSAVar(); + CHECK_FATAL((verSt != nullptr), "not a valid SSA Var"); + findDefStmt(iassign, *verSt); + getOffset(iassign); + + break; + } + case OP_iassignoff: { + IassignoffNode *iassignoff = dynamic_cast(baseNode); + CHECK_FATAL((iassignoff != nullptr), "Invalid IR node with OpCode OP_iassignoff"); + // TODO + // struct MemoryAccess memoryAccess = getIassignoffMemoryAccess(*iassignoff); + + // // the rhs-expr can still read from somewhere, push it to stack + // baseNodeStack.push(iassignoff->GetBOpnd(1)); + break; + } + case OP_iassignfpoff: + case OP_iassignpcoff: + break; + case OP_iread: { + IreadNode *iread = nullptr; + if (baseNode->IsSSANode()) { + iread = dynamic_cast(dynamic_cast(baseNode)->GetNoSSANode()); + } else { + iread = dynamic_cast(baseNode); + } + CHECK_FATAL((iread != nullptr), "Invalid IR node with OpCode OP_iread."); + + BaseNode *addrExpr = iread->Opnd(0); + + // struct pattern: we want to see where the addr-expr comes from + auto *addrofSSANode = dynamic_cast(addrExpr); + + // pattern: base ptr + offset + if (addrExpr->GetOpCode() == OP_add) { + addrExpr = addrExpr->Opnd(0); + // we want to see where the addr-expr comes from + addrofSSANode = dynamic_cast(addrExpr); + } + + if (addrofSSANode == nullptr) break; // without def-use info + + // Collect information + const VersionSt *verSt = addrofSSANode->GetSSAVar(); + CHECK_FATAL((verSt != nullptr), "not a valid SSA Var"); + findDefStmt(iread, *verSt); + getOffset(iread); + + break; + } + case OP_ireadoff: + case OP_ireadfpoff: + case OP_ireadpcoff: + break; + case OP_block: { + CHECK_FATAL((false), "OP_block should not be visited in MEDoNeighborOpt"); + break; + } + default: { + } + } + + for (size_t j = 0; j < baseNode->NumOpnds(); ++j) { + if (baseNode->GetOpCode() == OP_return) continue; + std::vector newDirections; + std::copy(curDirections.begin(), curDirections.end(), back_inserter(newDirections)); + newDirections.push_back(j); + + TraverseNodes(stmt, baseNode->Opnd(j), newDirections); + } + } +} + +const StmtNode *MEDoNeighborOpt::findDefStmt(BaseNode *baseNode, const VersionSt &vst) { + // LogInfo::MapleLogger() << "Neighbor Opt: findDefStmt: "; + // vst.Dump(); + // LogInfo::MapleLogger() << "\nNeighbor Opt: dump def stmt, type: "; + + const StmtNode *defStmt; + if (vst.GetDefType() == VersionSt::kAssign) { + // LogInfo::MapleLogger() << "kAssign\n"; + // vst.DumpDefStmt(); + const StmtNode *assign = vst.GetAssignNode(); + defStmt = assign; + } else if (vst.GetDefType() == VersionSt::kPhi) { + // LogInfo::MapleLogger() << "kPhi\n"; + // vst.DumpDefStmt(); + defStmt = nullptr; + // PhiNode *phi = vst.GetPhi(); + // ASSERT(phi->GetResult() == &vst, "MarkVst: wrong corresponding version st in phi"); + // MarkControlDependenceLive(ToRef(dfBB)); + // for (size_t i = 0; i < phi->GetPhiOpnds().size(); ++i) { + // const VersionSt *verSt = phi->GetPhiOpnds()[i]; + // AddToWorkList(verSt); + // } + // phi->SetIsLive(true); + } else if (vst.GetDefType() == VersionSt::kMayDef) { + // LogInfo::MapleLogger() << "kMayDef\n"; + // vst.DumpDefStmt(); + defStmt = nullptr; + // const MayDefNode *mayDef = vst.GetMayDef(); + // ASSERT(mayDef->GetResult() == &vst, "MarkVst: wrong corresponding version st in maydef"); + // const VersionSt *verSt = mayDef->GetOpnd(); + + // auto defStmt = mayDef->GetStmt(); + // if (kOpcodeInfo.IsCallAssigned(defStmt->GetOpCode())) { + // MapleVector &mustDefs = ssaTab.GetStmtMustDefNodes(*defStmt); + // for (auto &node : mustDefs) { + // if (aliasInfo->MayAlias(node.GetResult()->GetOst(), vst.GetOst())) { + // AddToWorkList(node.GetResult()); + // } + // } + // } + + // MarkStmtRequired(ToRef(defStmt), ToRef(dfBB)); + // AddToWorkList(verSt); + } else if (vst.GetDefType() == VersionSt::kMustDef) { + // LogInfo::MapleLogger() << "kMustDef\n"; + // vst.DumpDefStmt(); + const MustDefNode *mustDef = vst.GetMustDef(); + ASSERT(mustDef->GetResult() == &vst, "MarkVst: wrong corresponding version st in mustDef"); + const StmtNode *stmtNode = mustDef->GetStmt(); + defStmt = stmtNode; + // MarkStmtRequired(ToRef(mustDef->GetStmt()), ToRef(dfBB)); + defStmt->Dump(); + } else { + defStmt = nullptr; + } + this->node2def[baseNode] = defStmt; + if (this->def2nodes.find(defStmt) == this->def2nodes.end()) { + std::vector tmp; + tmp.push_back(baseNode); + this->def2nodes[defStmt] = tmp; + } else { + std::vector &tmp = this->def2nodes[defStmt]; + tmp.push_back(baseNode); + } + // LogInfo::MapleLogger() << "\n"; + return this->node2def[baseNode]; +} + +void MEDoNeighborOpt::filterRemovableMemAccesses() { + std::map>::iterator itr; + for (itr = this->def2nodes.begin(); itr != this->def2nodes.end(); ++itr) { + const StmtNode *defStmt = itr->first; + if (defStmt == nullptr) continue; + std::vector targetNodes = itr->second; + + // find removable nodes in targetNodes + std::vector> sortVec; + for (BaseNode *node : targetNodes) { + CHECK_FATAL((!node->IsSSANode()), "Find SSANode in NeighborOpt results."); + + sortVec.push_back(std::make_pair(node, this->node2offset[node])); + } + sort(sortVec.begin(), sortVec.end(), this->cmp); + std::pair prevTarget = sortVec[0]; + + int64_t newSize = 0; + for (int i = 1; i < sortVec.size(); i++) { + auto it = sortVec[i]; + // LogInfo::MapleLogger() << "offset2: " << it.second << " offset1: " << prevTarget.second << " size2: " << this->node2size[it.first] << "\n"; + if (it.second - prevTarget.second + this->node2size[it.first] <= 8) { + // removable + if (this->node2stmt.count(it.first) == 0) { + LogInfo::MapleLogger() << "not in this->node2stmt \n"; + it.first->Dump(); + } + StmtNode *stmtNode = dynamic_cast(this->node2stmt[it.first]); + CHECK_FATAL((stmtNode != nullptr), "Not a valid StmtNode (1)."); + + this->result.push_back(std::make_pair(stmtNode->GetSrcPos(), -1)); + AsanNodePos nodePos = {stmtNode->GetSrcPos(), this->node2directions[it.first], it.first->GetOpCode(), -1LL}; + this->results.push_back(nodePos); + + LogInfo::MapleLogger() << "removable:\n"; + it.first->Dump(); + stmtNode->Dump(); + LogInfo::MapleLogger() << "offset: " << it.second << "\n"; + LogInfo::MapleLogger() << "offset2: " << it.second << " offset1: " << prevTarget.second + << " size2: " << this->node2size[it.first] << "\n"; + newSize = it.second - prevTarget.second + this->node2size[it.first]; + } else { + LogInfo::MapleLogger() << "new check:\n"; + StmtNode *stmtNode = dynamic_cast(this->node2stmt[prevTarget.first]); + CHECK_FATAL((stmtNode != nullptr), "Not a valid StmtNode (2)."); + stmtNode->Dump(); + LogInfo::MapleLogger() << "new size: " << newSize << "\n"; + + this->result.push_back(std::make_pair(stmtNode->GetSrcPos(), newSize)); + AsanNodePos nodePos = {stmtNode->GetSrcPos(), this->node2directions[prevTarget.first], + prevTarget.first->GetOpCode(), newSize}; + this->results.push_back(nodePos); + prevTarget = it; + newSize = 0; + } + } + LogInfo::MapleLogger() << "new check:\n"; + StmtNode *stmtNode = dynamic_cast(this->node2stmt[prevTarget.first]); + CHECK_FATAL((stmtNode != nullptr), "Not a valid StmtNode (3)."); + stmtNode->Dump(); + LogInfo::MapleLogger() << "new size: " << newSize << "\n"; + + this->result.push_back(std::make_pair(stmtNode->GetSrcPos(), newSize)); + AsanNodePos nodePos = {stmtNode->GetSrcPos(), this->node2directions[prevTarget.first], + prevTarget.first->GetOpCode(), newSize}; + this->results.push_back(nodePos); + } +} + +void MEDoNeighborOpt::GetAnalysisDependence(maple::AnalysisDep &aDep) const { + aDep.SetPreservedAll(); +} + +std::vector MEDoNeighborOpt::GetResult() { + // DumpResult(); // for debug + return this->results; +} + +bool MEDoNeighborOpt::PreAnalysis(MeFunction &mefunc) { + // MIRBuilder *builder = mefunc.GetMIRModule().GetMIRBuilder(); + // this->func = &mefunc; + if (mefunc.GetMirFunc()->GetAttr(FUNCATTR_extern)) { + return false; + } + if (mefunc.GetName().find("__asan_") == 0 || mefunc.GetName().find("__san_cov_") == 0) { + return false; + } + + std::vector toInstrument; + std::vector noReturnCalls; + + for (BB *bb : mefunc.GetCfg()->GetAllBBs()) { + // bb could be nullptr sometime? + if (bb == nullptr) continue; + for (StmtNode &stmt : bb->GetStmtNodes()) { + toInstrument.push_back(&stmt); + // result.push_back(stmt.GetSrcPos()); // TODO + if (CallNode *callNode = dynamic_cast(&stmt)) { + MIRFunction *calleeFunc = GlobalTables::GetFunctionTable().GetFunctionFromPuidx(callNode->GetPUIdx()); + if (calleeFunc->NeverReturns() || calleeFunc->GetName() == "exit") { + noReturnCalls.push_back(callNode); + } + } + } + } + + // toInstrument contains basically all statements in a function + // we want to do Neighbor Checks optimization before further instrumentation + /*/ TODO: Monkbai 6/27/2023 + 1. Collect base addresses for all indirect memory access + 2. Collect offset for all indirect memory access + 3. Find removable checks and update toInstrument + /*/ + MeCFG *cfg = mefunc.GetCfg(); + // MIRModule &mod = mefunc.GetMIRModule(); + for (BB *bb : cfg->GetAllBBs()) { + /* + LogInfo::MapleLogger() << "BB:\n"; + bb->Dump(&mod); + LogInfo::MapleLogger() << "\n"; + */ + + // for each BB, we check all stmt inside + if (bb == nullptr) continue; + NeighborOptEntry(bb); + } + + return true; +} + +bool MEDoNeighborOpt::PhaseRun(MeFunction &f) { + LogInfo::MapleLogger() << "The MEDoNeighborOpt::PhaseRun is running " << f.GetName() << "\n"; + PreAnalysis(f); + // for (auto pos : this->result){ + // LogInfo::MapleLogger() << "filenum " << pos.FileNum() << " linenum " << pos.LineNum() << " colnum " << pos.Column() << "\n"; + // } + LogInfo::MapleLogger() << "The MEDoNeighborOpt::PhaseRun ends " << f.GetName() << "\n"; + return true; +} + +} // namespace maple + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/src/asan_phases.cpp b/src/mapleall/maple_san/src/asan_phases.cpp index cccfde11602448cebb40a698382bd7c34ec4ce4f..c00d8915c4004bcbcf317ccf300724e3ed601b50 100644 --- a/src/mapleall/maple_san/src/asan_phases.cpp +++ b/src/mapleall/maple_san/src/asan_phases.cpp @@ -2,15 +2,29 @@ #include "asan_phases.h" #include "asan_function.h" +#include "asan_razor.h" #include "asan_module.h" +#include "san_common.h" #include "me_cfg.h" #include "mempool.h" +#include "asan_config.h" +#include "asan_asap.h" namespace maple { void MEDoAsan::GetAnalysisDependence(maple::AnalysisDep &aDep) const { aDep.AddRequired(); + + if (ENABLE_ASAN_NEIGHBOR_OPT) { + aDep.AddRequired(); + aDep.AddRequired(); + aDep.AddRequired(); + // TODO: we need to filter results of MEDoVarCheck + aDep.AddRequired(); + aDep.AddRequired(); // to me ir + aDep.AddRequired(); // to maple ir + } aDep.SetPreservedAll(); } @@ -18,11 +32,15 @@ bool MEDoAsan::PhaseRun(maple::MeFunction &f) { // The reture value is said to show whether this phase modifies IR // The document said the return value is not used PreAnalysis *symbol_interesting = GET_ANALYSIS(MEDoVarCheck, f); + + std::vector targetPos; + if (ENABLE_ASAN_NEIGHBOR_OPT) + targetPos = GET_ANALYSIS(MEDoNeighborOpt, f); if (symbol_interesting == nullptr) { LogInfo::MapleLogger() << "The MEDoVarCheck::PhaseRun is not called " << f.GetName() << "\n"; } LogInfo::MapleLogger() << "The MEDoAsan::PhaseRun is running " << f.GetName() << "\n"; - AddressSanitizer Asan(f.GetMIRModule(), symbol_interesting); + AddressSanitizer Asan(f.GetMIRModule(), symbol_interesting, targetPos); Asan.instrumentFunction(f); return true; } @@ -52,6 +70,16 @@ bool MEDoVarCheck::PhaseRun(maple::MeFunction &f) { MIRSymbol *mirSymbol = mirFunction->GetLocalOrGlobalSymbol(addrofNode->GetStIdx()); addrOfSymList.insert(mirSymbol); } + else if (baseNode->GetOpCode() == OP_block) { + // Actually, this phase runs before other phases + // This block may never execute + // We add this here to ensure all stmtNodes are visited + BlockNode* tmpBlock = dynamic_cast(baseNode); + CHECK_NULL_FATAL(tmpBlock); + for (StmtNode &stmt : tmpBlock->GetStmtNodes()) { + baseNodeStack.push(&stmt); + } + } for (size_t j = 0; j < baseNode->NumOpnds(); j++) { baseNodeStack.push(baseNode->Opnd(j)); } @@ -85,6 +113,25 @@ PreAnalysis* MEDoVarCheck::GetResult() { return this->result; } +void MEASanRazor::GetAnalysisDependence(maple::AnalysisDep &aDep) const { + // aDep.AddRequired(); + aDep.SetPreservedAll(); +} + +bool MEASanRazor::PhaseRun(maple::MeFunction &f) { + ASanRazor razor; + bool changed = razor.InstrumentFunctionForProfiling(f, *AsanBlockListPtr); + if (SanRazorIs2ndCompile) { + razor.ClearSanRazorCoverageStmtNodes(); + if (ASAN_DEBUG_MODE) { + LogInfo::MapleLogger() << "ASAP Process function: " << f.GetName() << "\n"; + } + razor.RemoveRedundantInstrumentation(f, *AsanBlockListPtr); + ASAPRemoveAsanChecks(AsanModulePtr->GetSrcFilePath(), f.GetMirFunc()->GetBody()); + } + return changed; +} + } // namespace maple -#endif \ No newline at end of file +#endif diff --git a/src/mapleall/maple_san/src/asan_razor.cpp b/src/mapleall/maple_san/src/asan_razor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2867b6099d6c64de6fcb3b3747107150c09682b9 --- /dev/null +++ b/src/mapleall/maple_san/src/asan_razor.cpp @@ -0,0 +1,857 @@ +/* +This file implements SanRazor for improving efficiency +*/ +#ifdef ENABLE_MAPLE_SAN + +#include "asan_razor.h" +#include "asan_interfaces.h" +#include "asan_module.h" +#include "asan_ud.h" +#include +#include +#include + +namespace maple { + +std::string SanRazorCounterName; +std::string SanRazorSizeName; +std::string SanRazorIsInitializedName; +std::string SanRazorFilenameName; +std::atomic SanRazorSize; +std::map SanRazorCheckIDMap; +std::map SanRazorCallSiteIdMap; +std::map SanRazorStmtKeyCheckIDMap; +std::map SanRazorStmtKey2CostMap; +std::set SanRazorAsanRelevantStmtKey; +std::string IDMAP_SUFFIX = ".idmap"; +std::string SanRazorCheckIDMapFileName; +std::set AsanRelevanCheckIDs; +bool SanRazorIs2ndCompile = false; + +std::string SanRazorConcatPrefixAndFilename(std::filesystem::path prefix, std::string filename) { + std::string _name = filename; + while (_name[0] == '/') { + _name = _name.substr(1); + } + std::filesystem::path full_filename = prefix / _name; + return full_filename.string(); +} + +std::string GetSanrazorCounterName(ModuleAddressSanitizer& AsanModule) { + std::string postfix = AsanModule.GetModuleSymbolPostfix(); + return "__sanrazor_cov_counter_" + postfix; +} + +std::string GetSanrazorSizeName(ModuleAddressSanitizer& AsanModule) { + std::string postfix = AsanModule.GetModuleSymbolPostfix(); + return "__sanrazor_cov_size_" + postfix; +} + +std::string GetSanrazorIsInitializedName(ModuleAddressSanitizer& AsanModule) { + std::string postfix = AsanModule.GetModuleSymbolPostfix(); + return "__sanrazor_cov_is_initialized_" + postfix; +} + +std::string GetSanrazorFilenameName(ModuleAddressSanitizer& AsanModule) { + std::string postfix = AsanModule.GetModuleSymbolPostfix(); + return "__sanrazor_filename_" + postfix; +} + +std::string GetSanrazorAppendInfoCallerName(ModuleAddressSanitizer& AsanModule) { + std::string postfix = AsanModule.GetModuleSymbolPostfix(); + return "__sanrazor_append_info_" + postfix; +} + +std::string GetSanrazorAppendInfoStructVarName(ModuleAddressSanitizer& AsanModule) { + std::string postfix = AsanModule.GetModuleSymbolPostfix(); + return "__sanrazor_append_info_var_" + postfix; +} + +std::string GetSanrazorAppendInfoCallerDefFilePath(ModuleAddressSanitizer& AsanModule) { + std::string src_path = AsanModule.GetSrcFilePath(); + return src_path + "__sanrazor_append_info.c"; +} + +void AddGlobalSanrazorCounter(ModuleAddressSanitizer& AsanModule, size_t size, bool isInit) { + std::string name = GetSanrazorCounterName(AsanModule); + AsanModule.AddGlobalUIntArray(name, std::vector(size, 0), isInit); +} + +void AddGlobalSanrazorSize(ModuleAddressSanitizer& AsanModule, size_t size, bool isInit) { + std::string name = GetSanrazorSizeName(AsanModule); + AsanModule.AddGlobalUInt(name, size, isInit); +} + +void AddGlobalSanrazorIsInitialized(ModuleAddressSanitizer& AsanModule, bool isInit) { + std::string name = GetSanrazorIsInitializedName(AsanModule); + AsanModule.AddGlobalChar(name, 0, isInit); +} + +void AddGlobalSanrazorFilename(ModuleAddressSanitizer& AsanModule, bool isInit) { + std::string name = GetSanrazorFilenameName(AsanModule); + AsanModule.AddGlobalString(name, AsanModule.GetSrcFilePath(), isInit); +} + +void InitializeSanRazorGlobalNames(ModuleAddressSanitizer& AsanModule) { + SanRazorCounterName = GetSanrazorCounterName(AsanModule); + SanRazorSizeName = GetSanrazorSizeName(AsanModule); + SanRazorIsInitializedName = GetSanrazorIsInitializedName(AsanModule); + SanRazorFilenameName = GetSanrazorFilenameName(AsanModule); + SanRazorSize = 0; + std::filesystem::path prefix_dir = GetSanRazorCheckIDMapDirPath(); + SanRazorCheckIDMapFileName = AsanModule.GetSrcFilePath() + IDMAP_SUFFIX; + SanRazorCheckIDMapFileName = SanRazorConcatPrefixAndFilename(prefix_dir, SanRazorCheckIDMapFileName); + SanRazorIs2ndCompile = ((MeOption::asanFlags & 0x04) > 0); + // besides var names, we also create global symbols + AddGlobalSanrazorCounter(AsanModule, 1, true); + AddGlobalSanrazorSize(AsanModule, 1, true); + AddGlobalSanrazorFilename(AsanModule, true); + AddGlobalSanrazorIsInitialized(AsanModule, true); +} + +void SetSanRazorGlobals(ModuleAddressSanitizer& AsanModule) { + AddGlobalSanrazorCounter(AsanModule, SanRazorSize, false); + AddGlobalSanrazorSize(AsanModule, SanRazorSize, false); + AddGlobalSanrazorFilename(AsanModule, false); + AddGlobalSanrazorIsInitialized(AsanModule, false); +} + +void AddASanRazorSignalHandlerRegister(ModuleAddressSanitizer& AsanModule) { + MIRBuilder* builder = AsanModule.GetMIRModule()->GetMIRBuilder(); + MIRFunction *registerAsanCovFunction = + AsanModule.GetOrInsertFunction(kAsanCovRegisterName, GlobalTables::GetTypeTable().GetVoid(), {}); + MapleVector args(builder->GetCurrentFuncCodeMpAllocator()->Adapter()); + CallNode *call__asan_init = builder->CreateStmtCall(registerAsanCovFunction->GetPuidx(), args); + AsanModule.AppendStmtToGlobalInit(call__asan_init); +} + +void AddSanRazorAppendInfo(ModuleAddressSanitizer& AsanModule) { + MIRBuilder* moduleBuilder = AsanModule.GetMIRModule()->GetMIRBuilder(); + std::string structName = GetSanrazorAppendInfoCallerName(AsanModule); + MIRFunction* caller = moduleBuilder->GetOrCreateFunction(structName, TyIdx(PTY_void)); + caller->AllocPregTab(); + caller->AllocSymTab(); + caller->AllocLabelTab(); + caller->SetWithSrc(false); + caller->SetAttr(FuncAttrKind::FUNCATTR_section); + caller->SetAttr(FuncAttrKind::FUNCATTR_public); + caller->SetAttr(FuncAttrKind::FUNCATTR_extern); + caller->GetFuncAttrs().SetPrefixSectionName(".init_array"); + AsanModule.GetMIRModule()->AddFunction(caller); + + // The following code add StmtNode to the caller + // We need to change current function to the newly created caller first + // otherwise, there are errors in following phases + MIRBuilder* builder = caller->GetCodeMempool()->New(caller->GetModule()); + auto old_func = builder->GetCurrentFunction(); + builder->SetCurrentFunction(*caller); + MIRType *voidType = GlobalTables::GetTypeTable().GetVoid(); + // void __san_cov_append_info(uint64_t *array, char *is_initialized, char* filename, size_t size) + MIRFunction *__san_cov_append_info = getOrInsertFunction(builder, kSanRazorCovAppendInfo, voidType, {}); + MapleVector argcov(AsanModule.GetMIRModule()->GetMPAllocator().Adapter()); + GStrIdx arrayvarIdx = GlobalTables::GetStrTable().GetOrCreateStrIdxFromName(SanRazorCounterName); + MIRSymbol* arrayvar = GlobalTables::GetGlobalTables().GetGsymTable().GetSymbolFromStrIdx(arrayvarIdx); + BaseNode *arraynode = builder->CreateExprAddrof(0, *arrayvar); + argcov.emplace_back(arraynode); + GStrIdx isinitvarIdx = GlobalTables::GetStrTable().GetOrCreateStrIdxFromName(SanRazorIsInitializedName); + MIRSymbol* isinitvar = GlobalTables::GetGlobalTables().GetGsymTable().GetSymbolFromStrIdx(isinitvarIdx); + BaseNode *isinitnode = builder->CreateExprAddrof(0 ,*isinitvar); + argcov.emplace_back(isinitnode); + GStrIdx filenamevarIdx = GlobalTables::GetStrTable().GetOrCreateStrIdxFromName(SanRazorFilenameName); + MIRSymbol* filenamevar = GlobalTables::GetGlobalTables().GetGsymTable().GetSymbolFromStrIdx(filenamevarIdx); + BaseNode *filenamenode = builder->CreateExprDread(*filenamevar); + argcov.emplace_back(filenamenode); + GStrIdx sizevarIdx = GlobalTables::GetStrTable().GetOrCreateStrIdxFromName(SanRazorSizeName); + MIRSymbol* sizevar = GlobalTables::GetGlobalTables().GetGsymTable().GetSymbolFromStrIdx(sizevarIdx); + BaseNode *sizenode = builder->CreateExprDread(*sizevar); + argcov.emplace_back(sizenode); + // CallNode *callcov = builder->CreateStmtCall(__san_cov_append_info->GetPuidx(), argcov); + CallNode* callcov = AsanModule.GetMIRModule()->GetMemPool()->New(caller->GetCodeMemPoolAllocator(), + OP_call, __san_cov_append_info->GetPuidx()); + callcov->SetNOpnd(argcov); + callcov->SetNumOpnds(argcov.size()); + BlockNode* block = caller->GetCodeMemPool()->New(); + block->AddStatement(callcov); + caller->SetBody(block); + // reset back the function + builder->SetCurrentFunction(*old_func); +} + +void DumpSanRazorCheckIDMap(std::string filename) { + std::filesystem::path full_filename = filename; + if (ASAN_DEBUG_MODE) { + LogInfo::MapleLogger() << "Dumping SanRazorCheckIDMap to " << full_filename.string() << "\n"; + } + std::filesystem::create_directories(full_filename.parent_path()); + std::ofstream file; + // Change cout's buffer to file. + std::streambuf *backup = LogInfo::MapleLogger().rdbuf(); + LogInfo::MapleLogger().rdbuf(file.rdbuf()); + file.open(full_filename.string(), std::ios::out); + file << std::hex; + for (auto &pair : SanRazorCheckIDMap) { + bool isAsanRelevant = (AsanRelevanCheckIDs.find(pair.second) != AsanRelevanCheckIDs.end()); + if (isAsanRelevant) + file << pair.first << " " << pair.second << " 1\n"; + else + file << pair.first << " " << pair.second << " 0\n"; + } + // Restore cout's buffer. + LogInfo::MapleLogger().rdbuf(backup); + file.close(); +} + +void LoadSanRazorCheckIDMap(std::string filename) { + std::ifstream file; + file.open(filename); + std::string line; + while (std::getline(file, line)) { + std::istringstream iss(line); + size_t callSiteId; + AsanRazorCheckID id; + // int isAsanRelevant; + iss >> std::hex >> callSiteId >> id; + SanRazorCheckIDMap[callSiteId] = id; + SanRazorCallSiteIdMap[id] = callSiteId; + } + file.close(); +} + +void LoadSanRazorStmtKeyCheckIDMap(std::string filename) { + CHECK_FATAL((ASAN_RAZOR_IGNORE_IDMAP_FILE || std::filesystem::exists(filename)), \ + "%s does not exist\nExit.", filename.c_str()); + std::ifstream file; + file.open(filename, std::ios::in); + std::string src_path = AsanModulePtr->GetSrcFilePath(); + std::string line; + while (std::getline(file, line)) { + std::istringstream iss(line); + size_t callSiteId; + AsanRazorCheckID id; + int isAsanRelevant; + iss >> std::hex >> callSiteId >> id >> isAsanRelevant; + ASanRazorStmtKey key(callSiteId, src_path); + SanRazorStmtKeyCheckIDMap[key] = id; + // TODO: to decide cost + SanRazorStmtKey2CostMap[key] = 1.0; + if (isAsanRelevant > 0) + SanRazorAsanRelevantStmtKey.insert(key); + } + file.close(); +} + +void SanRazorClearEverything() { + SanRazorCounterName = ""; + SanRazorSizeName = ""; + SanRazorIsInitializedName = ""; + SanRazorFilenameName = ""; + SanRazorSize = 0; + SanRazorCheckIDMap.clear(); + SanRazorStmtKeyCheckIDMap.clear(); + SanRazorCheckIDMapFileName = ""; + AsanRelevanCheckIDs.clear(); +} + +std::string SanRazorLineDelimiter = ASAN_RAZOR_DEFAULT_DELIMITER; +std::map SanRazorStmtCountMap; + +void LoadSanRazorLog() { + char* file_path = std::getenv(ASAN_RAZOR_LOG_MACRO); + std::string filename; + if (file_path == nullptr) { + filename = ASAN_RAZOR_DEFAULT_LOG_FILE; + } else { + filename = std::string(file_path); + } + std::ifstream file; + file.open(filename); + if (!std::filesystem::exists(filename)) { + LogInfo::MapleLogger() << filename << " does not exist\nExit."; + abort(); + } + std::string line; + while (std::getline(file, line)) { + std::string src_path = line.substr(0, line.find(SanRazorLineDelimiter)); + if (src_path.size() < 1) continue; + line = line.substr(src_path.size() + SanRazorLineDelimiter.size()); + std::string id_str = line.substr(0, line.find(SanRazorLineDelimiter)); + size_t id = std::stoull(id_str, nullptr, 16); + std::string count_str = line.substr(id_str.size() + SanRazorLineDelimiter.size()); + AsanRazorCheckCount count = std::stoull(count_str, nullptr, 16); + // Save map of (id, src_path) => count + // Then we sort the map by count, ASAP will remove K most frequent checks + ASanRazorStmtKey key(id, src_path); + if (SanRazorStmtCountMap.find(key) == SanRazorStmtCountMap.end()) { + SanRazorStmtCountMap[key] = count; + } else { + SanRazorStmtCountMap[key] += count; + } + } + file.close(); +} + +std::string GetSanRazorCheckIDMapDirPath() { + char* idmap_dir = std::getenv(ASAN_RAZOR_IDMAP_DIR_MACRO); + std::string prefix_dir; + if (idmap_dir == nullptr) { + prefix_dir = ASAN_RAZOR_DEFAULT_IDMAP_DIR; + } else { + prefix_dir = std::string(idmap_dir); + } + return prefix_dir; +} + +void LoadAllSanRazorStmtKeyCheckIDMaps() { + std::filesystem::path prefix_dir = GetSanRazorCheckIDMapDirPath(); + std::set loaded_files; + for (auto &pair : SanRazorStmtCountMap) { + std::string filename = pair.first.src_path; + if (loaded_files.find(filename) == loaded_files.end()) { + loaded_files.insert(filename); + std::string map_filename = SanRazorConcatPrefixAndFilename(prefix_dir, filename + IDMAP_SUFFIX); + LoadSanRazorStmtKeyCheckIDMap(map_filename); + } + } +} + +ASanRazor::ASanRazor() {} + +bool ASanRazor::InstrumentFunctionForProfiling(MeFunction &F, const AsanVirtualBlockList &AsanBlocks) { + this->PreprocessFunctionForProfiling(F, AsanBlocks); + // Perform UD analysis here, such we can spot redundant UC as well + this->udchain.initME(F, this->asanStmtIDSet); + InstrumentBlockForProfiling(this->mirfunc->GetBody()); + return true; +} + +bool ASanRazor::RemoveRedundantInstrumentation(MeFunction &F, const AsanVirtualBlockList &AsanBlocks) { + //this->PreprocessFunctionForProfiling(F, AsanBlocks); + std::set toRemoveStmtNodeSet; + std::set SanRazorToRemoveStmtIDs = GetRemoveAsanID(); + if (ASAN_DEBUG_MODE){ + LogInfo::MapleLogger() << "Check to be removed "<blocks) { + StmtID first_id = asanBlock.first->GetStmtID(); + if (SanRazorToRemoveStmtIDs.find(first_id) != SanRazorToRemoveStmtIDs.end()) { + num_asan_blocks++; + StmtNode* cur = asanBlock.first; + while (cur != nullptr && cur != asanBlock.last) { + toRemoveStmtNodeSet.insert(cur); + cur = cur->GetNext(); + } + toRemoveStmtNodeSet.insert(asanBlock.last); + } + } + if (ASAN_DEBUG_MODE) { + LogInfo::MapleLogger() << "SanRazor removes " << num_asan_blocks << " asan blocks " << \ + toRemoveStmtNodeSet.size() << " StmtNodes\n"; + } + + std::vector stmtBlockList = getAllOrderedStmtNodeList(F.GetMirFunc()->GetBody()); + for (auto &pair : stmtBlockList) { + if (toRemoveStmtNodeSet.find(pair.first) != toRemoveStmtNodeSet.end()) { + pair.second->RemoveStmt(pair.first); + } + } + + return true; +} + +void ASanRazor::ClearSanRazorCoverageStmtNodes() { + for (StmtBlockNodePair &pair : asanCovStmts) { + pair.second->RemoveStmt(pair.first); + } +} + +void ASanRazor::_GetLabelIdx2LabelNodeFromBlock(BlockNode* block) { + for (auto &stmt : block->GetStmtNodes()) { + if (stmt.GetOpCode() == OP_label) { + LabelNode *label = dynamic_cast(&stmt); + CHECK_NULL_FATAL(label); + this->labelIdx2LabelNode[label->GetLabelIdx()] = label; + this->labelIdx2BlockNode[label->GetLabelIdx()] = block; + } else { + for (size_t i = 0; i < stmt.NumOpnds(); ++i) { + if (stmt.Opnd(i)->GetOpCode() == OP_block) { + BlockNode *tmpblock = dynamic_cast(stmt.Opnd(i)); + CHECK_NULL_FATAL(tmpblock); + _GetLabelIdx2LabelNodeFromBlock(tmpblock); + } + } + } + } +} + +std::map ASanRazor::GetLabelIdx2LabelNode() { + if (!this->labelIdx2LabelNode.empty()) { + return this->labelIdx2LabelNode; + } + _GetLabelIdx2LabelNodeFromBlock(this->mirfunc->GetBody()); + return this->labelIdx2LabelNode; +} + +std::map ASanRazor::GetLabelIdx2BlockNode() { + if (!this->labelIdx2BlockNode.empty()) { + return this->labelIdx2BlockNode; + } + _GetLabelIdx2LabelNodeFromBlock(this->mirfunc->GetBody()); + return this->labelIdx2BlockNode; +} + +void ASanRazor::_GetStmtID2node(BlockNode* block) { + for (auto &stmt : block->GetStmtNodes()) { + this->stmtID2node[stmt.GetStmtID()] = &stmt; + this->stmtID2BlockNode[stmt.GetStmtID()] = block; + for (size_t i = 0; i < stmt.NumOpnds(); ++i) { + if (stmt.Opnd(i)->GetOpCode() == OP_block) { + BlockNode *tmpblock = dynamic_cast(stmt.Opnd(i)); + CHECK_NULL_FATAL(tmpblock); + _GetStmtID2node(tmpblock); + } + } + } +} + +std::map ASanRazor::GetStmtID2node() { + if (!this->stmtID2node.empty()) { + return this->stmtID2node; + } + _GetStmtID2node(this->mirfunc->GetBody()); + return stmtID2node; +} + +AsanVirtualBlock* ASanRazor::GetAsanVirtualBlockWithIDBlock(const AsanVirtualIDBlock &idblock) { + // check if the block in this function + StmtNode* first = this->stmtID2node[idblock.first]; + if (first == nullptr) { + return nullptr; + } + AsanVirtualBlock* block = this->module->GetMemPool()->New(); + block->first = first; + block->last = this->stmtID2node[idblock.last]; + block->normal = this->stmtID2node[idblock.normal]; + if (block->normal == nullptr) { + block->normal = block->last->GetNext(); + } + for (auto crash_id : idblock.crashes) { + block->crashes.push_back(this->stmtID2node[crash_id]); + } + return block; +} + +std::set ASanRazor::GetAsanStmtIDSet(const AsanVirtualBlockList &AsanBlocks) { + if (!asanStmtIDSet.empty()) { + return asanStmtIDSet; + } + for (auto &idblock : *(AsanBlocks.idblocks)) { + AsanVirtualBlock* block = GetAsanVirtualBlockWithIDBlock(idblock); + if (block == nullptr) continue; + StmtNode *cur = block->first; + StmtNode *last = block->last; + CHECK_NULL_FATAL(cur); + CHECK_NULL_FATAL(last); + while (cur != last) { + asanStmtIDSet.insert(cur->GetStmtID()); + stmtID2asanBlock[cur->GetStmtID()] = block; + cur = cur->GetNext(); + } + asanStmtIDSet.insert(idblock.last); + stmtID2asanBlock[idblock.last] = block; + } + return asanStmtIDSet; +} + +CallNode *ASanRazor::GetCallAsanCovStmt(AsanRazorCheckID id) { + // Store mapping first + size_t callSiteId = SanRazorSize++; + if (!SanRazorIs2ndCompile) { + SanRazorCheckIDMap[callSiteId] = id; + } + else { + // make sure the given sanrazor log file and the idmap file are consistent + // SPEC2017 removes the build directory before rebuilding + // resulting in all idmap files are deleted + if (ASAN_RAZOR_IGNORE_IDMAP_FILE) { + SanRazorCheckIDMap[callSiteId] = id; + ASanRazorStmtKey key(callSiteId, this->srcpath); + SanRazorStmtKeyCheckIDMap[key] = id; + } else { + CHECK_FATAL(SanRazorCheckIDMap[callSiteId] == id, "SanRazorCheckIDMap[%x] != %x", callSiteId, id); + } + } + // Build call node + MIRBuilder *builder = module->GetMIRBuilder(); + MIRType *voidType = GlobalTables::GetTypeTable().GetVoid(); + // void __san_cov_trace_pc(uint64_t *array, uint32_t stmt_id, char *is_initialized, char* filename, size_t size) + if (ASAN_RAZOR_TRACE_FUNC == 1) { + MIRFunction *__san_cov_trace_pc = getOrInsertFunction(builder, kSanRazorCov, voidType, {}); + MapleVector argcov(this->module->GetMPAllocator().Adapter()); + GStrIdx arrayvarIdx = GlobalTables::GetStrTable().GetOrCreateStrIdxFromName(SanRazorCounterName); + MIRSymbol* arrayvar = GlobalTables::GetGlobalTables().GetGsymTable().GetSymbolFromStrIdx(arrayvarIdx); + // BaseNode *arraynode = builder->CreateExprDread(*arrayvar); + BaseNode *arraynode = builder->CreateExprAddrof(0, *arrayvar); + argcov.emplace_back(arraynode); + argcov.emplace_back(builder->GetConstUInt64(callSiteId)); + GStrIdx isinitvarIdx = GlobalTables::GetStrTable().GetOrCreateStrIdxFromName(SanRazorIsInitializedName); + MIRSymbol* isinitvar = GlobalTables::GetGlobalTables().GetGsymTable().GetSymbolFromStrIdx(isinitvarIdx); + BaseNode *isinitnode = builder->CreateExprAddrof(0 ,*isinitvar); + argcov.emplace_back(isinitnode); + GStrIdx filenamevarIdx = GlobalTables::GetStrTable().GetOrCreateStrIdxFromName(SanRazorFilenameName); + MIRSymbol* filenamevar = GlobalTables::GetGlobalTables().GetGsymTable().GetSymbolFromStrIdx(filenamevarIdx); + BaseNode *filenamenode = builder->CreateExprDread(*filenamevar); + argcov.emplace_back(filenamenode); + GStrIdx sizevarIdx = GlobalTables::GetStrTable().GetOrCreateStrIdxFromName(SanRazorSizeName); + MIRSymbol* sizevar = GlobalTables::GetGlobalTables().GetGsymTable().GetSymbolFromStrIdx(sizevarIdx); + BaseNode *sizenode = builder->CreateExprDread(*sizevar); + argcov.emplace_back(sizenode); + CallNode *callcov = builder->CreateStmtCall(__san_cov_trace_pc->GetPuidx(), argcov); + return callcov; + } else if (ASAN_RAZOR_TRACE_FUNC == 2) { + MIRFunction *__san_cov_trace_pc = getOrInsertFunction(builder, kSanRazorCov2, voidType, {}); + MapleVector argcov(this->module->GetMPAllocator().Adapter()); + GStrIdx arrayvarIdx = GlobalTables::GetStrTable().GetOrCreateStrIdxFromName(SanRazorCounterName); + MIRSymbol* arrayvar = GlobalTables::GetGlobalTables().GetGsymTable().GetSymbolFromStrIdx(arrayvarIdx); + BaseNode *arraynode = builder->CreateExprAddrof(0, *arrayvar); + argcov.emplace_back(arraynode); + argcov.emplace_back(builder->GetConstUInt64(callSiteId)); + CallNode *callcov = builder->CreateStmtCall(__san_cov_trace_pc->GetPuidx(), argcov); + return callcov; + } else { + CHECK_FATAL(false, "Unknow type of trace function (%d)", ASAN_RAZOR_TRACE_FUNC); + } + +} + +CallNode *ASanRazor::GetCallDumpAsanCovStmt() { + MIRBuilder *builder = module->GetMIRBuilder(); + MIRType *voidType = GlobalTables::GetTypeTable().GetVoid(); + MIRFunction *__san_cov_trace_pc = getOrInsertFunction(builder, kSanRazorCovDump, voidType, {}); + MapleVector argcov(this->module->GetMPAllocator().Adapter()); + ConststrNode *conststr = this->module->GetMemPool()->New(this->srcpathidx); + conststr->SetPrimType(PTY_a64); + CallNode *callcov = builder->CreateStmtCall(__san_cov_trace_pc->GetPuidx(), argcov); + return callcov; +} + +void ASanRazor::InstrumentAsanCheckForProfiling(StmtNode *stmt, const AsanVirtualBlock &asanBlock, BlockNode *block) { + CHECK_NULL_FATAL(stmt); + CHECK_NULL_FATAL(asanBlock.first); + CHECK_NULL_FATAL(asanBlock.last); + CHECK_NULL_FATAL(asanBlock.normal); + CHECK_FATAL(asanBlock.crashes.size() > 0, "crash branch should not be empty"); + // The first stmtid is the stmtid for identifying the asan check + AsanRazorCheckID normal_id = GetTrueBranchID(stmt); + AsanRazorCheckID crash_id = GetFalseBranchID(stmt); + // Insert call __san_cov_trace_pc + CallNode *normal_call = GetCallAsanCovStmt(normal_id); + AsanRelevanCheckIDs.insert(normal_id); + // We do not use the normal pointer since two asan blocks can be continuous + // block->InsertBefore(asanBlock.normal, normal_call); + block->InsertAfter(asanBlock.last, normal_call); + asanCovStmts.push_back(StmtBlockNodePair(normal_call, block)); + if (!asanBlock.crashes.empty()) { + AsanRelevanCheckIDs.insert(crash_id); + } + for (auto crash_branch : asanBlock.crashes) { + // before asan report and crash + // we dump the coverage info + CallNode *crash_call = GetCallAsanCovStmt(crash_id); + CallNode *dump_call = GetCallDumpAsanCovStmt(); + block->InsertBefore(crash_branch, crash_call); + block->InsertBefore(crash_branch, dump_call); + asanCovStmts.push_back(StmtBlockNodePair(crash_call, block)); + asanCovStmts.push_back(StmtBlockNodePair(dump_call, block)); + } +} + +void ASanRazor::InstrumentIfStmtForProfiling(IfStmtNode *ifStmt) { + CHECK_NULL_FATAL(ifStmt); + if (!ASAN_RAZOR_INSTRUMENT_ASAN_CHECKS_ONLY + &&!this->udchain.UnmatchUserCheck(ifStmt->GetStmtID(), asanStmtIDSet) + ) { + AsanRazorCheckID true_id = GetTrueBranchID(ifStmt); + AsanRazorCheckID false_id = GetFalseBranchID(ifStmt); + + CallNode *true_call = GetCallAsanCovStmt(true_id); + CallNode *false_call = GetCallAsanCovStmt(false_id); + ifStmt->GetThenPart()->InsertFirst(true_call); + ifStmt->GetElsePart()->InsertFirst(false_call); + asanCovStmts.push_back(StmtBlockNodePair(true_call, ifStmt->GetThenPart())); + asanCovStmts.push_back(StmtBlockNodePair(false_call, ifStmt->GetElsePart())); + } + InstrumentBlockForProfiling(ifStmt->GetThenPart()); + InstrumentBlockForProfiling(ifStmt->GetElsePart()); +} + +void ASanRazor::InstrumentWhileStmtForProfiling(WhileStmtNode *whileStmt, BlockNode *block) { + CHECK_NULL_FATAL(whileStmt); + if (!ASAN_RAZOR_INSTRUMENT_ASAN_CHECKS_ONLY + &&!this->udchain.UnmatchUserCheck(whileStmt->GetStmtID(), asanStmtIDSet) + ) { + AsanRazorCheckID true_id = GetTrueBranchID(whileStmt); + AsanRazorCheckID false_id = GetFalseBranchID(whileStmt); + + CallNode *true_call = GetCallAsanCovStmt(true_id); + CallNode *false_call = GetCallAsanCovStmt(false_id); + whileStmt->GetBody()->InsertFirst(true_call); + block->InsertAfter(whileStmt, false_call); + asanCovStmts.push_back(StmtBlockNodePair(true_call, whileStmt->GetBody())); + asanCovStmts.push_back(StmtBlockNodePair(false_call, block)); + } + InstrumentBlockForProfiling(whileStmt->GetBody()); +} + +void ASanRazor::InstrumentDoloopForProfiling(DoloopNode *doloop, BlockNode *block) { + CHECK_NULL_FATAL(doloop); + if (!ASAN_RAZOR_INSTRUMENT_ASAN_CHECKS_ONLY + &&!this->udchain.UnmatchUserCheck(doloop->GetStmtID(), asanStmtIDSet) + ) { + AsanRazorCheckID true_id = GetTrueBranchID(doloop); + AsanRazorCheckID false_id = GetFalseBranchID(doloop); + + CallNode *true_call = GetCallAsanCovStmt(true_id); + CallNode *false_call = GetCallAsanCovStmt(false_id); + doloop->GetDoBody()->InsertFirst(true_call); + block->InsertAfter(doloop, false_call); + asanCovStmts.push_back(StmtBlockNodePair(true_call, doloop->GetDoBody())); + asanCovStmts.push_back(StmtBlockNodePair(false_call, block)); + } + InstrumentBlockForProfiling(doloop->GetDoBody()); +} + +void ASanRazor::InstrumentForeachelemForProfiling(ForeachelemNode *foreachelem, BlockNode *block) { + CHECK_NULL_FATAL(foreachelem); + if (!ASAN_RAZOR_INSTRUMENT_ASAN_CHECKS_ONLY + &&!this->udchain.UnmatchUserCheck(foreachelem->GetStmtID(), asanStmtIDSet) + ) { + AsanRazorCheckID true_id = GetTrueBranchID(foreachelem); + AsanRazorCheckID false_id = GetFalseBranchID(foreachelem); + + CallNode *true_call = GetCallAsanCovStmt(true_id); + CallNode *false_call = GetCallAsanCovStmt(false_id); + foreachelem->GetLoopBody()->InsertFirst(true_call); + block->InsertAfter(foreachelem, false_call); + asanCovStmts.push_back(StmtBlockNodePair(true_call, foreachelem->GetLoopBody())); + asanCovStmts.push_back(StmtBlockNodePair(false_call, block)); + } + InstrumentBlockForProfiling(foreachelem->GetLoopBody()); +} + +void ASanRazor::InstrumentCondGotoForProfiling(CondGotoNode *condGoto, BlockNode *block, bool isTrue) { + CHECK_NULL_FATAL(condGoto); + if (ASAN_RAZOR_INSTRUMENT_ASAN_CHECKS_ONLY) return; + if (this->udchain.UnmatchUserCheck(condGoto->GetStmtID(), asanStmtIDSet)) return; + // the offset is used as LabelIdx + LabelIdx labelIdx = condGoto->GetOffset(); + LabelNode *label = this->labelIdx2LabelNode[labelIdx]; + BlockNode* labelBlock = this->labelIdx2BlockNode[labelIdx]; + CHECK_NULL_FATAL(label); + AsanRazorCheckID true_id = GetTrueBranchID(condGoto); + AsanRazorCheckID false_id = GetFalseBranchID(condGoto); + + CallNode *true_call = GetCallAsanCovStmt(true_id); + CallNode *false_call = GetCallAsanCovStmt(false_id); + if (isTrue) { + // if the condition is satisfied, we jump to the label + // we add a GotoNode to jump to the instruction next the label + // so that the inserted call will only be executed when + // we jump to its label after the brtrue + /* + brtrue L1 (condition) + false_call + ... + goto oldLabel + L1: true_call + OldLabel: ... + */ + block->InsertAfter(condGoto, false_call); + LabelIdx true_call_labelIdx = module->CurFunction()->GetLabelTab()->CreateLabel(); + LabelNode *true_call_label = module->GetMIRBuilder()->CreateStmtLabel(true_call_labelIdx); + GotoNode* toOldLabel = module->GetMIRBuilder()->CreateStmtGoto(OP_goto, labelIdx); + condGoto->SetOffset(true_call_labelIdx); + labelBlock->InsertBefore(label, toOldLabel); + labelBlock->InsertBefore(label, true_call_label); + labelBlock->InsertBefore(label, true_call); + asanCovStmts.push_back(StmtBlockNodePair(false_call, block)); + asanCovStmts.push_back(StmtBlockNodePair(toOldLabel, labelBlock)); + // NOTE: since we change the labelIdx of condGoto, the true_call_label is not removed + // asanCovStmts.push_back(StmtBlockNodePair(true_call_label, labelBlock)); + asanCovStmts.push_back(StmtBlockNodePair(true_call, labelBlock)); + } + else { + block->InsertAfter(condGoto, true_call); + LabelIdx false_call_labelIdx = module->CurFunction()->GetLabelTab()->CreateLabel(); + LabelNode *false_call_label = module->GetMIRBuilder()->CreateStmtLabel(false_call_labelIdx); + GotoNode* toOldLabel = module->GetMIRBuilder()->CreateStmtGoto(OP_goto, labelIdx); + condGoto->SetOffset(false_call_labelIdx); + labelBlock->InsertBefore(label, toOldLabel); + labelBlock->InsertBefore(label, false_call_label); + labelBlock->InsertBefore(label, false_call); + asanCovStmts.push_back(StmtBlockNodePair(true_call, block)); + asanCovStmts.push_back(StmtBlockNodePair(toOldLabel, labelBlock)); + // NOTE: since we change the labelIdx of condGoto, the false_call_label is not removed + // asanCovStmts.push_back(StmtBlockNodePair(false_call_label, labelBlock)); + asanCovStmts.push_back(StmtBlockNodePair(false_call, labelBlock)); + } +} + +bool ASanRazor::InstrumentBlockForProfiling(BlockNode *block) { + bool changed = false; + MapleVector origin_stmts(this->module->GetMPAllocator().Adapter()); + for (StmtNode &stmt : block->GetStmtNodes()) { + origin_stmts.emplace_back(&stmt); + } + bool in_skip_asan = false; + StmtNode *asan_last_stmt = nullptr; + for (StmtNode* stmt : origin_stmts) { + if (in_skip_asan) { + if (stmt != asan_last_stmt) + continue; + else { + // reach the end of asan check + in_skip_asan = false; + asan_last_stmt = nullptr; + continue; + } + } + if (asanStmtIDSet.find(stmt->GetStmtID()) != asanStmtIDSet.end()) { + // this is an asan check stmt + const AsanVirtualBlock *asanBlock = stmtID2asanBlock[stmt->GetStmtID()]; + InstrumentAsanCheckForProfiling(stmt, *asanBlock, block); + in_skip_asan = true; + asan_last_stmt = asanBlock->last; + changed = true; + SanCheckStmtID.insert(stmt->GetStmtID()); + continue; + } + switch (stmt->GetOpCode()) { + case OP_block: { + InstrumentBlockForProfiling(dynamic_cast(stmt)); + changed = true; + break; + } + case OP_if: { + IfStmtNode *ifStmt = dynamic_cast(stmt); + InstrumentIfStmtForProfiling(ifStmt); + changed = true; + break; + } + case OP_while: { + WhileStmtNode *whileStmt = dynamic_cast(stmt); + InstrumentWhileStmtForProfiling(whileStmt, block); + changed = true; + break; + } + case OP_doloop: { + DoloopNode *doloop = dynamic_cast(stmt); + InstrumentDoloopForProfiling(doloop, block); + changed = true; + break; + } + case OP_foreachelem: { + ForeachelemNode *foreachStmt = dynamic_cast(stmt); + InstrumentForeachelemForProfiling(foreachStmt, block); + changed = true; + break; + } + case OP_brtrue: { + CondGotoNode *condGoto = dynamic_cast(stmt); + InstrumentCondGotoForProfiling(condGoto, block, true); + changed = true; + break; + } + case OP_brfalse: { + CondGotoNode *condGoto = dynamic_cast(stmt); + InstrumentCondGotoForProfiling(condGoto, block, false); + changed = true; + break; + } + default: { + break; + } + + if (changed){ + UserCheckStmtID.insert(stmt->GetStmtID()); + } + + } + } + return changed; +} + +std::set ASanRazor::GetRemoveAsanID() { + // if sc sc matched, we kill the large ID one + // By heurstics, this is later one + // if uc sc matched, we kill the sc + std::set removeStmtID; + std::map found_SC; + // asanStmtIDSet + for (auto &stmtid : SanCheckStmtID){ + if (this->udchain.dassignID2dep.find(stmtid) != this->udchain.dassignID2dep.end()){ + found_SC[stmtid] = &this->udchain.dassignID2dep[stmtid]; + } + } + float sc_counter = 0 ; + float sc_tot = found_SC.size(); + for (const auto& pair0 : found_SC){ + if (removeStmtID.count(pair0.first) != 0) continue; + san_struct struct0 = Get_san_struct(pair0.first); + for (const auto& pair1 : found_SC){ + if (removeStmtID.count(pair1.first) != 0) continue; + if (pair0.first == pair1.first) continue; + san_struct struct1 = Get_san_struct(pair1.first); + if (this->udchain.sat_check(*pair0.second, *pair1.second)){ + if (this->udchain.dynamic_sat(struct0, struct1, true)){ + removeStmtID.insert(pair0.first); + sc_counter += 1; + } + } + } + } + float uc_counter = 0; + float uc_tot = UserCheckStmtID.size(); + for (const auto& UcStmtID : UserCheckStmtID) { + set_check * ptr = this->udchain.GetSetCheck(UcStmtID); + if (ptr == nullptr) continue; + san_struct UcStruct = Get_san_struct(UcStmtID); + for (const auto& pair : found_SC) { + if (removeStmtID.count(pair.first) != 0) continue; + if (pair.first == UcStmtID) continue; + san_struct ScStruct = Get_san_struct(pair.first); + if (this->udchain.sat_check(*pair.second, *ptr)) { + if (this->udchain.dynamic_sat(ScStruct, UcStruct, false)){ + removeStmtID.insert(pair.first); + uc_counter += 1; + } + } + } + } + if (ASAN_DEBUG_MODE){ + LogInfo::MapleLogger() << "ASAN-ASAN pair Matched: " << sc_counter << " / " << ((sc_tot*sc_tot) - sc_tot) /2.0 << "\n"; + LogInfo::MapleLogger() << "User Check ASAN Check Matched: " << uc_counter<< " / " << ((uc_tot*sc_tot)) << "\n"; + LogInfo::MapleLogger() << "Number of ASAN Checks can be removed: " << removeStmtID.size() << " / "< Shadow } } +void FunctionStackPoisoner::CleanStackShadowBytes(uint32 bytes, uint64 offset, MIRBuilder *mirBuilder, + BaseNode* ShadowBase, StmtNode *InsBefore) { + PrimType ptype; + switch (bytes) + { + case 8: + ptype = PTY_u64; + break; + case 4: + ptype = PTY_u32; + break; + case 2: + ptype = PTY_u16; + break; + case 1: + ptype = PTY_u8; + break; + default: + CHECK_FATAL_FALSE("Not supported yet."); + } + BinaryNode *Ptr = mirBuilder->CreateExprBinary(OP_add, *IntptrTy, ShadowBase, + mirBuilder->CreateIntConst(offset, IntptrTy->GetPrimType())); + MIRType* ptrType = GlobalTables::GetTypeTable().GetOrCreatePointerType( + GlobalTables::GetTypeTable().GetPrimType(ptype)->GetTypeIndex()); + IassignNode *iassignNode = mirBuilder->CreateStmtIassign(*ptrType, 0, Ptr, + mirBuilder->CreateIntConst(0, PTY_u64)); + iassignNode->InsertAfterThis(*InsBefore); +} + +void FunctionStackPoisoner::CleanStackShadow(const std::vector ShadowMask, MIRBuilder *mirBuilder, + BaseNode *ShadowBase, StmtNode *InsBefore) { + const size_t LargestStoreSizeInBytes = std::min(sizeof(uint64_t), ASan.LongSize / 8); + size_t i = 0; + bool non_zero = false; + #define ASAN_CHECK_NON_ZERO(_i, _size) \ + non_zero = false; \ + for (size_t _j = 0; _j < (_size); _j++) { \ + if (ShadowMask[(_i) + _j]) { \ + non_zero = true; \ + break; \ + } \ + } + + for (; i + LargestStoreSizeInBytes <= ShadowMask.size();) { + // we treat the last range specifically + ASAN_CHECK_NON_ZERO(i, LargestStoreSizeInBytes); + if (non_zero) { + CleanStackShadowBytes(LargestStoreSizeInBytes, i, mirBuilder, ShadowBase, InsBefore); + } + i += LargestStoreSizeInBytes; + } + if (i + 4 <= ShadowMask.size()) { + ASAN_CHECK_NON_ZERO(i, 4); + if (non_zero) { + CleanStackShadowBytes(4, i, mirBuilder, ShadowBase, InsBefore); + } + i += 4; + } + if (i + 2 <= ShadowMask.size()) { + ASAN_CHECK_NON_ZERO(i, 2) + if (non_zero) { + CleanStackShadowBytes(2, i, mirBuilder, ShadowBase, InsBefore); + } + i += 2; + } + if (i + 1 <= ShadowMask.size()) { + ASAN_CHECK_NON_ZERO(i, 1) + if (non_zero) { + CleanStackShadowBytes(1, i, mirBuilder, ShadowBase, InsBefore); + } + i += 1; + } + #undef ASAN_CHECK_NON_ZERO +} + void FunctionStackPoisoner::initializeCallbacks(const MIRModule &M) { MIRBuilder *mirBuilder = M.GetMIRBuilder(); #ifdef ENABLERBTREE @@ -255,9 +330,11 @@ void FunctionStackPoisoner::collectLocalVariablesWithAlloca() { bool FunctionStackPoisoner::runOnFunction() { // Collect alloca, ret, etc. - for (StmtNode &stmt : mirFunction->GetBody()->GetStmtNodes()) { - if (stmt.GetOpCode() == OP_return) { - RetVec.push_back(&stmt); + std::vector allStmtBlock = getAllOrderedStmtNodeList(mirFunction->GetBody()); + for (StmtBlockNodePair &stmtBlockPair : allStmtBlock) { + StmtNode* stmtptr = stmtBlockPair.first; + if (stmtptr->GetOpCode() == OP_return) { + RetVec.push_back(stmtBlockPair); } } // Collect local variable @@ -317,14 +394,15 @@ void FunctionStackPoisoner::createDynamicAllocasInitStorage() { mirFunction->GetBody()->InsertBefore(mirFunction->GetBody()->GetFirst(), dassignNode); } -void FunctionStackPoisoner::unpoisonDynamicAllocasBeforeInst(StmtNode *InstBefore) { +void FunctionStackPoisoner::unpoisonDynamicAllocasBeforeInst(StmtBlockNodePair stmtBlockPair) { MIRBuilder *mirBuilder = module->GetMIRBuilder(); MapleVector args(mirBuilder->GetCurrentFuncCodeMpAllocator()->Adapter()); args.emplace_back(mirBuilder->CreateDread(*DynamicAllocaLayout, IntptrTy->GetPrimType())); args.emplace_back(mirBuilder->CreateAddrof(*DynamicAllocaLayout, PTY_u64)); CallNode *callNode = mirBuilder->CreateStmtCall(AsanAllocasUnpoisonFunc->GetPuidx(), args); - callNode->InsertAfterThis(*InstBefore); + BlockNode* block = stmtBlockPair.second; + block->InsertBefore(stmtBlockPair.first, callNode); } // Unpoison dynamic allocas redzones. @@ -405,10 +483,10 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(ASanDynaVariableDescription } MIRSymbol *FunctionStackPoisoner::createAllocaForLayout(StmtNode *insBefore, MIRBuilder *mirBuilder, - const ASanStackFrameLayout &L) { + const ASanStackFrameLayout &L, MIRSymbol** asan_tmp_ptr) { CHECK_FATAL(L.FrameSize < UINT32_MAX, "Too large frame size."); MIRArrayType *arrayType = - GlobalTables::GetTypeTable().GetOrCreateArrayType(*GlobalTables::GetTypeTable().GetInt8(), L.FrameSize); + GlobalTables::GetTypeTable().GetOrCreateArrayType(*GlobalTables::GetTypeTable().GetInt8(), uint32_t(L.FrameSize)); MIRSymbol *tmp = getOrCreateSymbol(mirBuilder, arrayType->GetTypeIndex(), "asan_tmp", kStVar, kScAuto, mirFunction, kScopeLocal); size_t realignStack = 32; @@ -422,6 +500,7 @@ MIRSymbol *FunctionStackPoisoner::createAllocaForLayout(StmtNode *insBefore, MIR DassignNode *dassignNode = mirBuilder->CreateStmtDassign(alloca->GetStIdx(), 0, mirBuilder->CreateAddrof(*tmp, PTY_u64)); mirFunction->GetBody()->InsertBefore(insBefore, dassignNode); + *asan_tmp_ptr = tmp; return alloca; } @@ -439,7 +518,12 @@ void FunctionStackPoisoner::processStackVariable() { if (stackVariableDesc.empty()) { return; } - StmtNode *insBefore = mirFunction->GetBody()->GetFirst(); + BlockNode* funcBody = mirFunction->GetBody(); + // we add an empty block at the head of the body + // so that if the first statement is being replaced + // we can still insert a instructions before the first + BlockNode* insBefore = this->module->GetMemPool()->New(); + funcBody->InsertFirst(insBefore); size_t granularity = 1ULL << Mapping.Scale; size_t minHeaderSize = std::max(ASan.LongSize / 2, granularity); const ASanStackFrameLayout &L = ComputeASanStackFrameLayout(stackVariableDesc, granularity, minHeaderSize); @@ -456,12 +540,27 @@ void FunctionStackPoisoner::processStackVariable() { doStackMalloc = (!HasNonEmptyInlineAsm) && (!HasReturnsTwiceCall) && doStackMalloc; MIRBuilder *mirBuilder = module->GetMIRBuilder(); - MIRSymbol *allocaValue = createAllocaForLayout(insBefore, mirBuilder, L); + MIRSymbol *asan_tmp; + MIRSymbol *allocaValue = createAllocaForLayout(insBefore, mirBuilder, L, &asan_tmp); for (size_t i = 0; i < stackVariableDesc.size(); i++) { ASanStackVariableDescription desc = stackVariableDesc.at(i); if (desc.Symbol != nullptr) { MIRSymbol *localVar = desc.Symbol; + if (localVar->IsStatic() || localVar->IsPUStatic()) { + // skip them as I do not know how to assign its initial values + continue; + } + MIRConst* localVarConst = localVar->GetKonst(); + if (localVarConst != nullptr) { + MIRType& type = localVarConst->GetType(); + if (type.IsMIRArrayType()) { + // Given an array type, we do not replace it + // following accesses should be indirect accesses, + // and they are interesting memory to be instrumented + continue; + } + } BinaryNode *addExpr = mirBuilder->CreateExprBinary(OP_add, *IntptrTy, mirBuilder->CreateExprTypeCvt(OP_cvt, IntptrTy->GetPrimType(), PTY_u64, @@ -474,7 +573,7 @@ void FunctionStackPoisoner::processStackVariable() { newLocalVar->SetSrcPosition(localVar->GetSrcPosition()); // initialize the field of the Var by dassign DassignNode *dassignNode = mirBuilder->CreateStmtDassign(newLocalVar->GetStIdx(), 0, addExpr); - dassignNode->InsertAfterThis(*insBefore); + funcBody->InsertBefore(insBefore, dassignNode); // replace the Var being referenced replaceAllUsesWith(localVar, newLocalVar); /* The stack Var could be a function call parameter @@ -483,22 +582,15 @@ void FunctionStackPoisoner::processStackVariable() { NOTE: this must be done after replaceAllUsesWith; otherwise, the initialization could be falsely replaced*/ if (isFuncCallArg(localVar)) { - // // dread asan_ PTY_a64 - // BaseNode *asanAddrExpr = mirBuilder->CreateDread(*newLocalVar, PTY_a64); - // // dread - // MIRType *argVarType = localVar->GetType(); - // BaseNode *argVarValue = mirBuilder->CreateDread(*localVar, argVarType->GetPrimType()); - // // TODO: need check->since we assign the whole value of an argument, the field should be 0 - // IassignNode *iassignNode = mirBuilder->CreateStmtIassign(*localVarPtr, 0, asanAddrExpr, argVarValue); - // iassignNode->InsertAfterThis(*insBefore); - // // here we call memcpy to parse the argument - // the function should be the same as the upper code - MapleVector args(module->GetMPAllocator().Adapter()); - args.emplace_back(mirBuilder->CreateDread(*newLocalVar, PTY_a64)); - args.emplace_back(mirBuilder->CreateAddrof(*localVar, PTY_u64)); - args.emplace_back(mirBuilder->GetConstUInt64(localVar->GetType()->GetSize())); - IntrinsiccallNode *intrinsiccallNode = mirBuilder->CreateStmtIntrinsicCall(INTRN_C_memcpy, args); - intrinsiccallNode->InsertAfterThis(*insBefore); + // MapleVector args(module->GetMPAllocator().Adapter()); + // args.emplace_back(mirBuilder->CreateDread(*newLocalVar, PTY_a64)); + // args.emplace_back(mirBuilder->CreateAddrof(*localVar, PTY_u64)); + // args.emplace_back(mirBuilder->GetConstUInt64(localVar->GetType()->GetSize())); + // IntrinsiccallNode *intrinsiccallNode = mirBuilder->CreateStmtIntrinsicCall(INTRN_C_memcpy, args); + // funcBody->InsertBefore(insBefore, intrinsiccallNode); + AddrofNode* addrofNode = mirBuilder->CreateAddrof(*localVar, PTY_u64); + DassignNode* dassignnode = mirBuilder->CreateStmtDassign(*newLocalVar, 0, addrofNode); + funcBody->InsertBefore(insBefore, dassignnode); } } if (desc.AllocaInst != nullptr) { @@ -512,7 +604,6 @@ void FunctionStackPoisoner::processStackVariable() { assignNode->SetRHS(addExpr); } } - insBefore = insBefore->GetNext()->GetPrev(); } // The left-most redzone has enough space for at least 4 pointers. @@ -520,7 +611,7 @@ void FunctionStackPoisoner::processStackVariable() { BaseNode *basePlus0 = mirBuilder->CreateDread(*allocaValue, PTY_a64); IassignNode *basePlus0Store = mirBuilder->CreateStmtIassign( *IntptrPtrTy, 0, basePlus0, mirBuilder->CreateIntConst(kCurrentStackFrameMagic, IntptrTy->GetPrimType())); - basePlus0Store->InsertAfterThis(*insBefore); + funcBody->InsertBefore(insBefore, basePlus0Store); // Write the frame description constant to redzone[1] BaseNode *basePlus1 = mirBuilder->CreateExprBinary(OP_add, *GlobalTables::GetTypeTable().GetPrimType(PTY_u64), mirBuilder->CreateDread(*allocaValue, PTY_a64), @@ -530,7 +621,7 @@ void FunctionStackPoisoner::processStackVariable() { PTY_a64, GlobalTables::GetUStrTable().GetOrCreateStrIdxFromName(descriptionString)); IassignNode *basePlus1Store = mirBuilder->CreateStmtIassign(*IntptrPtrTy, 0, basePlus1, description); - basePlus1Store->InsertAfterThis(*insBefore); + funcBody->InsertBefore(insBefore, basePlus1Store); // Write the PC to redzone[2] BaseNode *basePlus2 = mirBuilder->CreateExprBinary(OP_add, *GlobalTables::GetTypeTable().GetPrimType(PTY_u64), mirBuilder->CreateDread(*allocaValue, PTY_a64), @@ -538,7 +629,7 @@ void FunctionStackPoisoner::processStackVariable() { AddroffuncNode *addroffuncNode = mirBuilder->CreateExprAddroffunc(mirFunction->GetPuidx()); addroffuncNode->SetPrimType(PTY_a64); IassignNode *basePlus2Store = mirBuilder->CreateStmtIassign(*IntptrPtrTy, 0, basePlus2, addroffuncNode); - basePlus2Store->InsertAfterThis(*insBefore); + funcBody->InsertBefore(insBefore, basePlus2Store); const auto &shadowAfterScope = GetShadowBytesAfterScope(stackVariableDesc, L); #ifdef ENABLERBTREE @@ -575,21 +666,24 @@ void FunctionStackPoisoner::processStackVariable() { mirFunction, kScopeLocal); DassignNode *dassignNode = mirBuilder->CreateStmtDassign( *shadowBase, 0, ASan.memToShadow(mirBuilder->CreateDread(*allocaValue, PTY_a64), *mirBuilder)); - dassignNode->InsertAfterThis(*insBefore); + funcBody->InsertBefore(insBefore, dassignNode); copyToShadow(shadowAfterScope, shadowAfterScope, mirBuilder, mirBuilder->CreateDread(*shadowBase, shadowBase->GetType()->GetPrimType()), insBefore); - std::vector shadowClean(shadowAfterScope.size(), 0); - // (Un)poison the stack before all ret instructions. - for (StmtNode *ret : RetVec) { + for (StmtBlockNodePair &ret : RetVec) { // Mark the current frame as retired. IassignNode *retiredNode = mirBuilder->CreateStmtIassign( *IntptrPtrTy, 0, basePlus0, mirBuilder->CreateIntConst(kRetiredStackFrameMagic, IntptrTy->GetPrimType())); - retiredNode->InsertAfterThis(*ret); + ret.second->InsertBefore(ret.first, retiredNode); if (doStackMalloc) { - copyToShadow(shadowAfterScope, shadowClean, mirBuilder, - mirBuilder->CreateDread(*shadowBase, shadowBase->GetType()->GetPrimType()), ret); + CleanStackShadow(shadowAfterScope, mirBuilder, + mirBuilder->CreateDread(*shadowBase, shadowBase->GetType()->GetPrimType()), ret.first); + // The following code is used for ensuring the stack is cleaned + // BaseNode* tmpStackBase = mirBuilder->CreateAddrof(*asan_tmp, PTY_u64); + // BaseNode* tmpSizeNode = mirBuilder->CreateIntConst(L.FrameSize, PTY_u64); + // CallNode* call_stack_check = CreateStackCheck(mirBuilder, tmpStackBase, tmpSizeNode); + // ret.second->InsertBefore(ret.first, call_stack_check); } } #endif @@ -599,53 +693,106 @@ void FunctionStackPoisoner::processStackVariable() { svd.Symbol->SetIsDeleted(); } } + // remove the emptyBlock for insertion + funcBody->RemoveStmt(insBefore); } -BaseNode *FunctionStackPoisoner::GetTransformedNode(MIRSymbol *oldVar, MIRSymbol *newVar, BaseNode *baseNode) { +BaseNode* FunctionStackPoisoner::TransformAddrofNode(MIRSymbol *oldVar, MIRSymbol *newVar, AddrofNode *addrofNode) { BaseNode *retNode = nullptr; - if (baseNode->GetOpCode() == OP_addrof) { - AddrofNode *addrofNode = dynamic_cast(baseNode); - MIRSymbol *mirSymbol = mirFunction->GetLocalOrGlobalSymbol(addrofNode->GetStIdx()); - if (mirSymbol->GetStIdx() == oldVar->GetStIdx()) { + MIRSymbol *mirSymbol = mirFunction->GetLocalOrGlobalSymbol(addrofNode->GetStIdx()); + if (mirSymbol->GetStIdx() == oldVar->GetStIdx()) { + FieldID fieldID = addrofNode->GetFieldID(); + if (fieldID == 0) { retNode = module->GetMIRBuilder()->CreateDread(*newVar, PTY_a64); - return retNode; - } - } else if (baseNode->GetOpCode() == OP_dassign) { - DassignNode *dassignNode = dynamic_cast(baseNode); - MIRSymbol *mirSymbol = mirFunction->GetLocalOrGlobalSymbol(dassignNode->GetStIdx()); - if (mirSymbol->GetStIdx() == oldVar->GetStIdx()) { - BaseNode *newRHS = GetTransformedNode(oldVar, newVar, dassignNode->GetRHS()); - StmtNode *newStmtNode = - module->GetMIRBuilder()->CreateStmtIassign(*newVar->GetType(), dassignNode->GetFieldID(), - module->GetMIRBuilder()->CreateDread(*newVar, PTY_a64), newRHS); - retNode = newStmtNode; - return retNode; - } - } else if (baseNode->GetOpCode() == OP_dread) { - DreadNode *dreadNode = dynamic_cast(baseNode); - MIRSymbol *mirSymbol = mirFunction->GetLocalOrGlobalSymbol(dreadNode->GetStIdx()); - if (mirSymbol->GetStIdx() == oldVar->GetStIdx()) { - IreadNode *newStmtNode = module->GetMIRBuilder()->CreateExprIread( - *GlobalTables::GetTypeTable().GetPrimType(dreadNode->GetPrimType()), *newVar->GetType(), - dreadNode->GetFieldID(), module->GetMIRBuilder()->CreateDread(*newVar, PTY_a64)); - retNode = newStmtNode; - return retNode; - } - } else if (baseNode->GetOpCode() == OP_dassignoff) { - DassignoffNode *dassignoffNode = dynamic_cast(baseNode); - MIRSymbol *mirSymbol = mirFunction->GetLocalOrGlobalSymbol(dassignoffNode->GetStIdx()); - if (mirSymbol->GetStIdx() == oldVar->GetStIdx()) { - BaseNode *newRHS = GetTransformedNode(oldVar, newVar, dassignoffNode->GetRHS()); - StmtNode *newStmtNode = - module->GetMIRBuilder()->CreateStmtIassignoff(newVar->GetType()->GetPrimType(), - dassignoffNode->GetOffset(), - module->GetMIRBuilder()->CreateDread(*newVar, PTY_a64), - newRHS); - retNode = newStmtNode; - return retNode; + } else { + // addrof a structure field + auto type = dynamic_cast(oldVar->GetType()); + OffsetPair offsetPair = type->GetFieldOffsetFromBaseAddr(fieldID); + retNode = module->GetMIRBuilder()->CreateExprBinary(OP_add, + *GlobalTables::GetTypeTable().GetPrimType(PTY_u64), + module->GetMIRBuilder()->CreateDread(*newVar, PTY_a64), + module->GetMIRBuilder()->CreateIntConst(offsetPair.byteOffset, PTY_u64)); } } + return retNode; +} + +BaseNode* FunctionStackPoisoner::TransformDassignNode(MIRSymbol *oldVar, MIRSymbol *newVar, DassignNode *dassignNode) { + BaseNode* retNode = nullptr; + MIRSymbol *mirSymbol = mirFunction->GetLocalOrGlobalSymbol(dassignNode->GetStIdx()); + if (mirSymbol->GetStIdx() == oldVar->GetStIdx()) { + BaseNode *newRHS = GetTransformedNode(oldVar, newVar, dassignNode->GetRHS()); + StmtNode *newStmtNode = + module->GetMIRBuilder()->CreateStmtIassign(*newVar->GetType(), dassignNode->GetFieldID(), + module->GetMIRBuilder()->CreateDread(*newVar, PTY_a64), newRHS); + retNode = newStmtNode; + } + return retNode; +} + +BaseNode* FunctionStackPoisoner::TransformDreadNode(MIRSymbol *oldVar, MIRSymbol *newVar, DreadNode *dreadNode) { + BaseNode* retNode = nullptr; + MIRSymbol *mirSymbol = mirFunction->GetLocalOrGlobalSymbol(dreadNode->GetStIdx()); + if (mirSymbol->GetStIdx() == oldVar->GetStIdx()) { + IreadNode *newStmtNode = module->GetMIRBuilder()->CreateExprIread( + *GlobalTables::GetTypeTable().GetPrimType(dreadNode->GetPrimType()), *newVar->GetType(), + dreadNode->GetFieldID(), module->GetMIRBuilder()->CreateDread(*newVar, PTY_a64)); + retNode = newStmtNode; + } + return retNode; +} + +BaseNode* FunctionStackPoisoner::TransformDassignoffNode(MIRSymbol *oldVar, MIRSymbol *newVar, DassignoffNode *dassignoffNode) { + BaseNode* retNode = nullptr; + MIRSymbol *mirSymbol = mirFunction->GetLocalOrGlobalSymbol(dassignoffNode->GetStIdx()); + if (mirSymbol->GetStIdx() == oldVar->GetStIdx()) { + BaseNode *newRHS = GetTransformedNode(oldVar, newVar, dassignoffNode->GetRHS()); + StmtNode *newStmtNode = + module->GetMIRBuilder()->CreateStmtIassignoff(newVar->GetType()->GetPrimType(), + dassignoffNode->GetOffset(), + module->GetMIRBuilder()->CreateDread(*newVar, PTY_a64), + newRHS); + retNode = newStmtNode; + } + return retNode; +} +BaseNode *FunctionStackPoisoner::GetTransformedNode(MIRSymbol *oldVar, MIRSymbol *newVar, BaseNode *baseNode) { + BaseNode *retNode = nullptr; + switch (baseNode->GetOpCode()) { + case OP_addrof: { + AddrofNode *addrofNode = getAddrofNode(baseNode); + retNode = TransformAddrofNode(oldVar, newVar, addrofNode); + if (retNode != nullptr) return retNode; + break; + } + case OP_dassign: { + DassignNode *dassignNode = dynamic_cast(baseNode); + retNode = TransformDassignNode(oldVar, newVar, dassignNode); + if (retNode != nullptr) return retNode; + break; + } + case OP_dread: { + DreadNode *dreadNode = getDreadNode(baseNode); + retNode = TransformDreadNode(oldVar, newVar, dreadNode); + if (retNode != nullptr) return retNode; + break; + } + case OP_dassignoff: { + DassignoffNode *dassignoffNode = dynamic_cast(baseNode); + retNode = TransformDassignoffNode(oldVar, newVar, dassignoffNode); + if (retNode != nullptr) return retNode; + break; + } + case OP_block: { + BlockNode* blockNode = dynamic_cast(baseNode); + return this->replaceAllUsesOfBlockNode(oldVar, newVar, blockNode); + } + default: { + break; + } + } + // all other process must run following code to make sure every child has been visited for (size_t j = 0; j < baseNode->NumOpnds(); j++) { BaseNode *tmpNode = GetTransformedNode(oldVar, newVar, baseNode->Opnd(j)); @@ -654,30 +801,38 @@ BaseNode *FunctionStackPoisoner::GetTransformedNode(MIRSymbol *oldVar, MIRSymbol } } retNode = baseNode; - CHECK_FATAL(retNode != nullptr, "No return node."); return retNode; } -void FunctionStackPoisoner::replaceAllUsesWith(MIRSymbol *oldVar, MIRSymbol *newVar) { - if (mirFunction->GetBody() == nullptr) { - return; - } - CHECK_FATAL(oldVar->GetTyIdx() == dynamic_cast(newVar->GetType())->GetPointedTyIdx(), - "Replace Var SYmbol with different PointedTyIdx"); +BlockNode* FunctionStackPoisoner::replaceAllUsesOfBlockNode(MIRSymbol *oldVar, MIRSymbol *newVar, BlockNode* block) { + // block->AddStatement(StmtNode*) has side effect on the input StmtNode + // the GetPrev and GetNext will be changed automatically + // Hence, we cache the node to be replaced, then call ReplaceStmt1WithStmt2 to replace them std::vector> toReplace; - for (StmtNode &stmt : mirFunction->GetBody()->GetStmtNodes()) { - BaseNode *newStmt = GetTransformedNode(oldVar, newVar, &stmt); - if (newStmt != dynamic_cast(&stmt)) { + for (StmtNode &stmt1 : block->GetStmtNodes()) { + StmtNode* stmt1ptr = &stmt1; + BaseNode *newStmt = GetTransformedNode(oldVar, newVar, stmt1ptr); + if (newStmt != dynamic_cast(stmt1ptr)) { StmtNode *stmt2ptr = dynamic_cast(newStmt); CHECK_FATAL(stmt2ptr != nullptr, "Get a stmt2 without StmtNode type"); - stmt2ptr->SetSrcPos(stmt.GetSrcPos()); - toReplace.emplace_back(std::pair(&stmt, stmt2ptr)); + stmt2ptr->SetSrcPos(stmt1ptr->GetSrcPos()); + toReplace.push_back(std::pair(stmt1ptr, stmt2ptr)); } } for (auto ss : toReplace) { - mirFunction->GetBody()->ReplaceStmt1WithStmt2(ss.first, ss.second); + block->ReplaceStmt1WithStmt2(ss.first, ss.second); } + return block; +} + +void FunctionStackPoisoner::replaceAllUsesWith(MIRSymbol *oldVar, MIRSymbol *newVar) { + if (mirFunction->GetBody() == nullptr) { + return; + } + CHECK_FATAL(oldVar->GetTyIdx() == dynamic_cast(newVar->GetType())->GetPointedTyIdx(), + "Replace Var SYmbol with different PointedTyIdx"); + this->replaceAllUsesOfBlockNode(oldVar, newVar, mirFunction->GetBody()); } } // namespace maple diff --git a/src/mapleall/maple_san/src/asan_ud.cpp b/src/mapleall/maple_san/src/asan_ud.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b6b739f316f8ceda67bb57e807f646a7edf6937c --- /dev/null +++ b/src/mapleall/maple_san/src/asan_ud.cpp @@ -0,0 +1,605 @@ +#ifdef ENABLE_MAPLE_SAN + +#include "asan_ud.h" + + +namespace maple { + +UDProfile::UDProfile() {} + +set_check *UDProfile::GetSetCheck(StmtID stmtID){ + // case for user check + if (this->StmtID2Check.find(stmtID) != this->StmtID2Check.end()){ + return &(this->StmtID2Check[stmtID]); + } + // for asan + if (this->dassignID2dep.find(stmtID) != this->dassignID2dep.end()){ + return &(this->dassignID2dep[stmtID]); + } + return nullptr; +} + +void UDProfile::CreateDassignIDMap() { + for (auto& [stmtID, check] : this->StmtID2Check) { + for (auto& dassignID : check.dassignID) { + // Hitting this for 5 times in bzip2 + // if (this->dassignID2dep.find(dassignID) != this->dassignID2dep.end()) { + // LogInfo::MapleLogger() << "Hitting a dup in CreateDassignIDMap "<<" \n"; + // } + dassignID2dep[dassignID] = check; + } + } +} + + +StmtNode* UDProfile::retLatest_Regassignment(StmtNode *stmt, RegID register_number) { + /* + Given a register number, return the stmt node + */ + StmtNode *ret_stmt = nullptr; + StmtNode *prevStmt = stmt->GetPrev(); + if (prevStmt != nullptr) { + if (prevStmt->GetOpCode() == OP_regassign) { + RegassignNode *regAssign = static_cast(prevStmt); + if (register_number == regAssign->GetRegIdx()) { + return prevStmt; + } else { + ret_stmt = retLatest_Regassignment(prevStmt, register_number); + } + } else if (prevStmt->GetOpCode() == OP_iassign) { + IassignNode *iassign = static_cast(prevStmt); + BaseNode *addr_expr = iassign->Opnd(0); + if (addr_expr->GetOpCode() == OP_iread) { + std::vector dump_reg; + std::vector dump_const; + recursion(addr_expr, dump_reg, dump_const); + for (auto reg_tmp : dump_reg) { + if (reg_tmp == register_number) { + return prevStmt; + } + } + ret_stmt = retLatest_Regassignment(prevStmt, register_number); + } else if (addr_expr->GetOpCode() == OP_regread) { + RegreadNode *regread = static_cast(addr_expr); + if (register_number == regread->GetRegIdx()) { + return prevStmt; + } else { + ret_stmt = retLatest_Regassignment(prevStmt, register_number); + } + } else if (IsCommutative(addr_expr->GetOpCode())) { + /* + 0th stmt: add u64 ( + iread u64 <* <$_TY_IDX111>> 22 (regread ptr %177), + cvt u64 i32 (mul i32 (regread i32 %190, constval i32 2))) + */ + // We just assume its sth like register +/- sth patterns + std::vector dump_reg; + std::vector dump_const; + recursion(addr_expr->Opnd(0), dump_reg, dump_const); + for (auto reg_tmp : dump_reg) { + if (reg_tmp == register_number) { + return prevStmt; + } + } + ret_stmt = retLatest_Regassignment(prevStmt, register_number); + } else { + ret_stmt = retLatest_Regassignment(prevStmt, register_number); + } + } else { + ret_stmt = retLatest_Regassignment(prevStmt, register_number); + } + } + return ret_stmt; +} + +void UDProfile::dep_iassign_expansion(IassignNode *iassign, set_check &dep) { + BaseNode *rhs_expr = iassign->Opnd(1); + if (rhs_expr->GetOpCode() == OP_regread) { + // Case 1. regread u32 %13 + RegreadNode *regread = static_cast(rhs_expr); + dep.register_live.push(regread->GetRegIdx()); + } else if (rhs_expr->GetOpCode() == OP_constval) { + // Case 2. constval i32 0 -> terminal + ConstvalNode *constValNode = static_cast(rhs_expr); + MIRConst *mirConst = constValNode->GetConstVal(); + if (mirConst != nullptr) { + if (mirConst->GetKind() == kConstInt) { + auto *const_to_get_value = safe_cast(mirConst); + dep.const_int64.push_back(const_to_get_value->GetValue()); + } + } + } else if (rhs_expr->GetOpCode() == OP_iread) { + // Case 3. iread agg <* <$_TY_IDX334>> 0 (regread ptr %4) -> Only hold the ptr for deref + std::vector dump_reg; + std::vector dump_const; + recursion(rhs_expr, dump_reg, dump_const); + for (VarID reg_temp : dump_reg) { + dep.register_live.push(reg_temp); + } + } else { + // Case 4. zext u32 8 (lshr u32 (regread u32 %4, constval i32 24)) + // Just assume it can be further expand and treat as a terminal... + // Some of this of compound stmt are register + // assigned by callassigned or function input register + // Although there are some case didn't like this + // We can set it as terminal register to prevent recursively deref + // since it may crash + // A proper SSA likely fix this issue + std::vector dump_reg; + std::vector dump_const; + recursion(rhs_expr, dump_reg, dump_const); + for (VarID reg_temp : dump_reg) { + dep.register_terminal.push_back(reg_temp); + } + } +} + +void UDProfile::dep_constval_expansion(ConstvalNode *constValNode, set_check &dep) { + MIRConst *mirConst = constValNode->GetConstVal(); + // we only trace int64 + // We didn't handle following cases + // kConstFloatConst, MIRFloatConst + // kConstDoubleConst, MIRDoubleConst + if (mirConst != nullptr) { + if (mirConst->GetKind() == kConstInt) { + auto *const_to_get_value = safe_cast(mirConst); + dep.const_int64.push_back(const_to_get_value->GetValue()); + } + } +} + +void UDProfile::GetMatched(){ + float counter = 0 ; + float tot = this->StmtID2Check.size(); + for (const auto& pair0 : this->StmtID2Check){ + for (const auto& pair1 : this->StmtID2Check){ + if (pair0.first!=pair1.first){ + if (sat_check(pair0.second, pair1.second)){ + counter += 1; + } + } + } + } + LogInfo::MapleLogger() << "Matched: " << counter/2.0 << " / " << ((tot*tot) - tot) /2.0 << "\n"; +} + +void UDProfile::dep_dassign_expansion(DassignNode *dassign, set_check &dep) { + dep_expansion(dassign->GetRHS(), dep); +} + +void UDProfile::dep_expansion(BaseNode *stmt, set_check &dep) { + switch (stmt->GetOpCode()) { + case OP_iassign: { + IassignNode *iassign = static_cast(stmt); + dep_iassign_expansion(iassign, dep); + break; + } + case OP_regread: { + RegreadNode *regread = static_cast(stmt); + dep.register_live.push(regread->GetRegIdx()); + break; + } + case OP_constval: { + ConstvalNode *constValNode = static_cast(stmt); + dep_constval_expansion(constValNode, dep); + break; + } + case OP_conststr: { + ConststrNode *conststr = static_cast(stmt); + dep.const_str.push_back(conststr->GetStrIdx()); + break; + } + case OP_conststr16: { + Conststr16Node *conststr16 = static_cast(stmt); + dep.const_str.push_back(conststr16->GetStrIdx()); + break; + } + case OP_dread: { + DreadNode *dread = static_cast(stmt); + dep.var_live.push(dread->GetStIdx().Idx()); + break; + } + case OP_addrof: { + AddrofNode *addrof = static_cast(stmt); + dep.var_live.push(addrof->GetStIdx().Idx()); + break; + } + case OP_addroffunc: { + // We don't handle function pointer + break; + } + case OP_maydassign: + case OP_dassign: { + DassignNode *dassign = static_cast(stmt); + dep.dassignID.push_back(dassign->GetStmtID()); + //LogInfo::MapleLogger() << "dassignID: " << int(dassign->GetStmtID()) << "\n"; + dep_dassign_expansion(dassign, dep); + break; + } + case OP_dassignoff: { + // TODO: + // It is not documented in MAPLE IR. + break; + } + default: { + for (size_t i = 0; i < stmt->NumOpnds(); i++) { + dep_expansion(stmt->Opnd(i), dep); + } + break; + } + } +} + +StmtNode* UDProfile::retLatest_Varassignment(StmtNode *stmt, VarID var_number) { + StmtNode *ret_stmt = nullptr; + StmtNode *prevStmt = stmt->GetPrev(); + if (prevStmt != nullptr) { + if (prevStmt->GetOpCode() == OP_dassign || prevStmt->GetOpCode() == OP_maydassign) { + DassignNode *dassign = static_cast(prevStmt); + // get the variable + if (var_number == dassign->GetStIdx().Idx()) { + // get RHS + BaseNode *rhs_expr = dassign->GetRHS(); + std::vector dump_reg; + std::vector dump_const; + recursion(rhs_expr, dump_reg, dump_const); + // check if a = a + 1 condition exist + if (std::find(dump_reg.begin(), dump_reg.end(), var_number) != dump_reg.end()){ + ret_stmt = retLatest_Varassignment(prevStmt, var_number); + } else{ + return prevStmt; + } + } else { + ret_stmt = retLatest_Varassignment(prevStmt, var_number); + } + } else if (prevStmt->GetOpCode() == OP_iassign) { + IassignNode *iassign = static_cast(prevStmt); + BaseNode *addr_expr = iassign->Opnd(0); + if (addr_expr->GetOpCode() == OP_dread) { + DreadNode *dread = static_cast(addr_expr); + if (var_number == dread->GetStIdx().Idx()) { + return prevStmt; + } else { + ret_stmt = retLatest_Varassignment(prevStmt, var_number); + } + } else { + ret_stmt = retLatest_Varassignment(prevStmt, var_number); + } + } else if (IsCallNode(prevStmt->GetOpCode())){ + // if is paratmeter, + // return the current + // treat function call as terminal + bool term = false; + CallNode *callstmt = static_cast(prevStmt); + if (!callstmt->GetPragmas()->empty()) { + // Same type ??? + // if the var number is in the pragma, then it is a terminal + for (auto pragmaId : *callstmt->GetPragmas()) { + if (pragmaId == var_number) { + term = true; + break; + } + } + } + if (term){ + return nullptr; + }else{ + ret_stmt = retLatest_Varassignment(prevStmt, var_number); + } + + } else if ( + prevStmt->GetOpCode() == OP_if || + prevStmt->GetOpCode() == OP_brtrue || + prevStmt->GetOpCode() == OP_brfalse || + prevStmt->GetOpCode() == OP_dowhile || + prevStmt->GetOpCode() == OP_while ){ + return nullptr; + } else { + ret_stmt = retLatest_Varassignment(prevStmt, var_number); + } + } + return ret_stmt; +} + +void UDProfile::gen_register_dep(StmtNode *stmt, set_check &br_tmp) { + while (!br_tmp.var_live.empty()) { + uint32_t var_to_check = br_tmp.var_live.top(); + auto iter = this->var_to_stmt.find(var_to_check); + br_tmp.var_live.pop(); + if (iter != this->var_to_stmt.end()) { + StmtNode *latest_stmt_tmp = retLatest_Varassignment(stmt, var_to_check); + if (latest_stmt_tmp != nullptr) { + set_check br_tmp_go_var; + dep_expansion(latest_stmt_tmp, br_tmp_go_var ); + gen_register_dep(latest_stmt_tmp, br_tmp_go_var); + br_tmp = commit(br_tmp, br_tmp_go_var); + } else{ + br_tmp.var_terminal.push_back(var_to_check); + } + } else { + br_tmp.var_terminal.push_back(var_to_check); + } + } +} + +void UDProfile::recursion(BaseNode *stmt, std::vector &stmt_var, + std::vector &stmt_const ) { + switch (stmt->GetOpCode()) { + //case OP_regread: { + case OP_dread: { + DreadNode *dread = static_cast(stmt); + stmt_var.push_back(dread->GetStIdx().Idx()); + break; + } + case OP_constval: { + ConstvalNode *constValNode = static_cast(stmt); + MIRConst *mirConst = constValNode->GetConstVal(); + if (mirConst != nullptr) { + if (mirConst->GetKind() == kConstInt) { + auto *const_to_get_value = safe_cast(mirConst); + stmt_const.push_back(const_to_get_value->GetValue()); + } + } + break; + } + default: { + for (size_t i = 0; i < stmt->NumOpnds(); i++) { + recursion(stmt->Opnd(i), stmt_var, stmt_const); + } + } + } +} + + +bool UDProfile::sat_check(const set_check& a, const set_check& b) { + if (compareVectors(a.var_terminal, b.var_terminal) + /* + compareVectors(a.const_int64,b.const_int64) + */ + ) { + return true; + } + return false; +} + +bool UDProfile::dynamic_sat(const san_struct& a, const san_struct& b, bool SCSC) { + // For SC-UC case, SC must be var a + bool result = false; + if (a.tot_ctr == 0) return result; + if (b.tot_ctr == 0) return result; + + if (a.tot_ctr == b.tot_ctr ) { + result = (a.false_ctr == b.false_ctr) || (a.false_ctr == b.true_ctr); + } else if (!SCSC) { + if ((a.tot_ctr == b.false_ctr || a.tot_ctr == b.true_ctr) && + (a.tot_ctr == a.false_ctr || a.tot_ctr == a.true_ctr)) { + result = true; + } + } + return result; +} + + +void UDProfile::Dump(){ + for (const auto& pair : this->StmtID2Check){ + LogInfo::MapleLogger() << "StmtID: " << pair.first ; + set_check_print_dep(pair.second); + } +} + +void UDProfile::initME(MeFunction &mefunc, + std::set asanStmtIDSet +){ + this->mefunc = &mefunc; + this->asanStmtIDSet = asanStmtIDSet; + for (StmtNode &stmt : this->mefunc->GetMirFunc()->GetBody()->GetStmtNodes()) { + GetUD(stmt); + } + // Prepare a map for dassignID to set_check + CreateDassignIDMap(); +} + +void UDProfile::GetUD(StmtNode &stmt) { + /* + - In current version, we cannot handle loops and if statements correctly + */ + // Since current ASAN is placed before lowering, this code can't be triggered + // // OP_regassign -> = + switch (stmt.GetOpCode()) { + case OP_regassign: { + RegassignNode *regAssign = static_cast(&stmt); + if (this->reg_to_stmt.count(regAssign->GetRegIdx()) == 0) { + this->reg_order.insert(regAssign->GetRegIdx()); + } + this->reg_to_stmt[regAssign->GetRegIdx()].push_back(&stmt); + break; + } + case OP_dassign: + case OP_maydassign: { + // TODO ... + // If is not 0, then the variable must be a structure, + // and the assignment only applies to the specified field. + DassignNode *dassign = static_cast(&stmt); + // uint32 + if (this->var_to_stmt.count(dassign->GetStIdx().Idx()) == 0) { + this->var_order.insert(dassign->GetStIdx().Idx()); + } + this->var_to_stmt[dassign->GetStIdx().Idx()].push_back(&stmt); + break; + } + // Unsupported OPCODE: + // 1. iassignoff (, ) + // 2023-02-07: I added iassignoff as interestedMemoryAccess, the address is + // calculated by ` + offset`. Hence, the instrumented code is + // simply the same as iassign + // 2. callassigned + // 2023-02-07: I added callassigned to transform the returned variables' names + // there are dassign OpCodes inside callassigned instruction + case OP_callassigned:{ + // We currently skip it, the retVar_XXX variable should not be instrumented + // A Sanrazor considered function as a terminal + /* + callassigned (, ..., ) { + dassign + dassign + ... + dassign } + */ + break; + } + case OP_iassign: { + // syntax: iassign (, ) + // %addr-expr = + BaseNode *addr_expr = stmt.Opnd(0); + // addr_expr have 3 cases + // iread u64 <* <$_TY_IDX111>> 22 (regread ptr %177) + if (addr_expr->GetOpCode() == OP_iread) { + std::vector dump_var; + std::vector dump_const; + recursion(addr_expr, dump_var, dump_const); + for (VarID reg_tmp : dump_var) { + if (this->var_to_stmt.count(reg_tmp) == 0) { + this->var_order.insert(reg_tmp); + } + this->var_to_stmt[reg_tmp].push_back(&stmt); + } + } + else if (addr_expr->GetOpCode() == OP_dread) { + // dread i64 %asan_shadowBase + DreadNode *dread = static_cast(addr_expr); + if (this->var_to_stmt.count(dread->GetStIdx().Idx()) == 0) { + this->var_order.insert(dread->GetStIdx().Idx()); + } + this->var_to_stmt[dread->GetStIdx().Idx()].push_back(&stmt); + } else if (IsCommutative(addr_expr->GetOpCode())) { + std::vector dump_var; + std::vector dump_const; + recursion(addr_expr->Opnd(0), dump_var, dump_const); + for (VarID reg_tmp : dump_var) { + if (this->var_to_stmt.count(reg_tmp) == 0) { + this->var_order.insert(reg_tmp); + } + this->var_to_stmt[reg_tmp].push_back(&stmt); + } + } + break; + } + // Checks + case OP_brtrue: + case OP_brfalse:{ + set_check br_tmp; + dep_expansion(stmt.Opnd(0), br_tmp); + gen_register_dep(&stmt, br_tmp); + //auto iter = this->StmtID2Check.find(stmt.GetStmtID()); + //CHECK_FATAL(iter == this->StmtID2Check.end() , "There is a dup entry in this->StmtID2Check."); + this->StmtID2Check[stmt.GetStmtID()] = br_tmp; + break; + } + case OP_if:{ + set_check if_tmp; + dep_expansion(stmt.Opnd(0), if_tmp); + gen_register_dep(&stmt, if_tmp); + //auto iter = this->StmtID2Check.find(stmt.GetStmtID()); + //CHECK_FATAL(iter == this->StmtID2Check.end() , "There is a dup entry in this->StmtID2Check."); + this->StmtID2Check[stmt.GetStmtID()] = if_tmp; + IfStmtNode *ifNode = static_cast(&stmt); + BlockNode *ThenBlock = ifNode->GetThenPart(); + if (ThenBlock != nullptr) { + StmtNode *thenStmt = ThenBlock->GetFirst(); + while (thenStmt != nullptr) { + if (thenStmt->GetOpCode() != OP_comment) { + GetUD(*thenStmt); + } + thenStmt = thenStmt->GetNext(); + } + } + BlockNode *ElseBlock = ifNode->GetElsePart(); + if (ElseBlock != nullptr) { + StmtNode *elseStmt = ElseBlock->GetFirst(); + while (elseStmt != nullptr) { + if (elseStmt->GetOpCode() != OP_comment) { + GetUD(*elseStmt); + } + elseStmt = elseStmt->GetNext(); + } + } + break; + } + case OP_dowhile: + case OP_while:{ + set_check while_tmp; + dep_expansion(stmt.Opnd(0), while_tmp); + gen_register_dep(&stmt, while_tmp); + //auto iter = this->StmtID2Check.find(stmt.GetStmtID()); + //CHECK_FATAL(iter == this->StmtID2Check.end() , "There is a dup entry in this->StmtID2Check."); + this->StmtID2Check[stmt.GetStmtID()] = while_tmp; + WhileStmtNode *whileNode = static_cast(&stmt); + BlockNode *Wbody = whileNode->GetBody(); + if (Wbody != nullptr) { + StmtNode *wstmt_body = Wbody->GetFirst(); + while (wstmt_body != nullptr){ + GetUD(*(wstmt_body)); + wstmt_body = wstmt_body->GetNext(); + } + } + break; + } + + case OP_doloop:{ + set_check doloop_tmp; + dep_expansion(stmt.Opnd(1), doloop_tmp); + gen_register_dep(&stmt, doloop_tmp); + //auto iter = this->StmtID2Check.find(stmt.GetStmtID()); + //CHECK_FATAL(iter == this->StmtID2Check.end() , "There is a dup entry in this->StmtID2Check."); + this->StmtID2Check[stmt.GetStmtID()] = doloop_tmp; + + DoloopNode *doloopNode = static_cast(&stmt); + BlockNode *Dbody = doloopNode->GetDoBody(); + for(auto &StmtInBlock : Dbody->GetStmtNodes()){ + GetUD(StmtInBlock); + } + break; + } + + case OP_foreachelem:{ + ForeachelemNode *foreachelemNode = static_cast(&stmt); + // There are no comparsion checks ?? + BlockNode *Fbody = foreachelemNode->GetLoopBody(); + if (Fbody != nullptr) { + StmtNode *fstmt_body = Fbody->GetFirst(); + while (fstmt_body != nullptr){ + GetUD(*(fstmt_body)); + fstmt_body = fstmt_body->GetNext(); + } + } + break; + } + + // op if while dowhile + default: { + for (size_t i = 0; i < stmt.NumOpnds(); i++) { + GetUD(static_cast(*(stmt.Opnd(i)))); + } + break; + } + } +} + + +bool UDProfile::UnmatchUserCheck(StmtID UcStmtID, std::set asanStmtIDSet){ + set_check * UcPtr = GetSetCheck(UcStmtID); + if (UcPtr == nullptr) { + // We might turn it to assertion + return true; + } + for (const auto& CheckID : asanStmtIDSet){ + set_check* ScPtr = GetSetCheck(CheckID); + if (ScPtr == nullptr) continue; + if (sat_check(*UcPtr, *ScPtr)) return false; + } + return true; +} + +} + +#endif // ENABLE_MAPLE_SAN \ No newline at end of file diff --git a/src/mapleall/maple_san/src/san_common.cpp b/src/mapleall/maple_san/src/san_common.cpp index 29e626e3da259c864e133c2478b5fed4cec9ab7c..419e08e649c01e1b693022dbaa09119ae092d5de 100644 --- a/src/mapleall/maple_san/src/san_common.cpp +++ b/src/mapleall/maple_san/src/san_common.cpp @@ -13,6 +13,37 @@ namespace maple { +std::vector getAllOrderedStmtNodeList(BlockNode* block) { + // When optimization enabled, there are BlockNode with list of StmtNode + // We recover the recursive structure into linear structure + // We could later optimize the recursive implementation with a stack + std::vector ret; + for (StmtNode &stmt : block->GetStmtNodes()) { + if (stmt.GetOpCode() == OP_block) { + BlockNode* tmpBlock = dynamic_cast(&stmt); + CHECK_FATAL((tmpBlock != nullptr), "Fail to convert stmt to BlockNode"); + std::vector tmp = getAllOrderedStmtNodeList(tmpBlock); + ret.reserve(ret.size() + tmp.size()); + ret.insert(ret.end(), tmp.begin(), tmp.end()); + } + else { + ret.push_back(StmtBlockNodePair(&stmt, block)); + for (size_t i = 0; i < stmt.NumOpnds(); ++i) { + StmtNode* tmpStmt = dynamic_cast(stmt.Opnd(i)); + if (tmpStmt == nullptr) continue; + if (tmpStmt->GetOpCode() == OP_block) { + BlockNode* tmpBlock = dynamic_cast(tmpStmt); + CHECK_FATAL((tmpBlock != nullptr), "Fail to convert stmt to BlockNode"); + std::vector tmp = getAllOrderedStmtNodeList(tmpBlock); + ret.reserve(ret.size() + tmp.size()); + ret.insert(ret.end(), tmp.begin(), tmp.end()); + } + } + } + } + return ret; +} + void appendToGlobalCtors(const MIRModule &mirModule, const MIRFunction *func) { MIRBuilder *mirBuilder = mirModule.GetMIRBuilder(); MIRFunction *GlobalCtors = mirBuilder->GetOrCreateFunction("__cxx_global_var_init", TyIdx(PTY_void)); @@ -229,6 +260,19 @@ MIRSymbol *getOrCreateSymbol(MIRBuilder *mirBuilder, const TyIdx tyIdx, const st return st; } +CallNode* CreateStackCheck(MIRBuilder *mirBuilder, BaseNode *baseNode, BaseNode *sizeNode) { + MIRType *voidType = GlobalTables::GetTypeTable().GetVoid(); + MIRType *intType = GlobalTables::GetTypeTable().GetUInt64(); + std::vector argTypes = {intType, intType}; + // Initialize the function + MIRFunction * check_func = getOrInsertFunction(mirBuilder, kAsanStackCheck2, voidType, argTypes); + check_func->SetAttr(FUNCATTR_extern); + MapleVector args(mirBuilder->GetCurrentFuncCodeMpAllocator()->Adapter()); + args.push_back(baseNode); + args.push_back(sizeNode); + return mirBuilder->CreateStmtCall(kAsanStackCheck2, args); +} + // Code for Sanrazor int SANRAZOR_MODE() { /* @@ -250,546 +294,6 @@ int SANRAZOR_MODE() { } } -CallNode *retCallCOV(const MeFunction &func, int bb_id, int stmt_id, int br_true, int type_of_check) { - MIRBuilder *builder = func.GetMIRModule().GetMIRBuilder(); - MIRType *voidType = GlobalTables::GetTypeTable().GetVoid(); - // void __san_cov_trace_pc(char *file_name, int bb_id, int stmt_id,int brtrue,int typecheck) - MIRFunction *__san_cov_trace_pc = getOrInsertFunction(builder, "__san_cov_trace_pc", voidType, {}); - MapleVector argcov(func.GetMIRModule().GetMPAllocator().Adapter()); - UStrIdx strIdx = GlobalTables::GetUStrTable().GetOrCreateStrIdxFromName(func.GetMIRModule().GetFileName()); - ConststrNode *conststr = func.GetMIRModule().GetMemPool()->New(strIdx); - conststr->SetPrimType(PTY_a64); - argcov.emplace_back(conststr); - argcov.emplace_back(builder->GetConstInt(bb_id)); - argcov.emplace_back(builder->GetConstInt(stmt_id)); - argcov.emplace_back(builder->GetConstInt(br_true)); - argcov.emplace_back(builder->GetConstInt(type_of_check)); - CallNode *callcov = builder->CreateStmtCall(__san_cov_trace_pc->GetPuidx(), argcov); - return callcov; -} - -bool isReg_redefined(BaseNode *stmt, std::vector &stmt_reg) { - switch (stmt->GetOpCode()) { - case OP_regread: { - RegreadNode *regread = static_cast(stmt); - stmt_reg.push_back(regread->GetRegIdx()); - break; - } - default: { - for (size_t i = 0; i < stmt->NumOpnds(); i++) { - isReg_redefined(stmt->Opnd(i), stmt_reg); - } - } - } - if (stmt->GetOpCode() == OP_regassign) { - RegassignNode *regAssign = static_cast(stmt); - if (std::count(stmt_reg.begin(), stmt_reg.end(), regAssign->GetRegIdx())) { - // value update - return false; - } else { - return true; - } - } - return false; -} - -template -void print_stack(std::stack &st) { - if (st.empty()) return; - T x = st.top(); - LogInfo::MapleLogger() << x << ","; - st.pop(); - print_stack(st); - st.push(x); -} - -template -bool compareVectors(const std::vector& a, const std::vector& b) { - // I am not sure why the original implementation use - // sets to compare the equivalence of two vectors (peformance?) - // Anyway, I think we may not delete the following code now - // if (a.size() != b.size()) - // { - // return false; - // } - // std::sort(a.begin(), a.end()); - // std::sort(b.begin(), b.end()); - // return (a == b); - std::set set_a(a.begin(), a.end()); - std::set set_b(b.begin(), b.end()); - if ((set_a.size() > 0) && (set_b.size() > 0)) { - return (set_a == set_b); - } - return false; -} - -int getIndex(std::vector v, StmtNode *K) { - auto it = find(v.begin(), v.end(), K); - // If element was found - if (it != v.end()) { - int index = it - v.begin(); - return index; - } else { - return -1; - } -} - -StmtNode *retLatest_Regassignment(StmtNode *stmt, int32 register_number) { - StmtNode *ret_stmt = nullptr; - StmtNode *prevStmt = stmt->GetPrev(); - if (prevStmt != nullptr) { - if (prevStmt->GetOpCode() == OP_regassign) { - RegassignNode *regAssign = static_cast(prevStmt); - if (register_number == regAssign->GetRegIdx()) { - return prevStmt; - } else { - ret_stmt = retLatest_Regassignment(prevStmt, register_number); - } - } else if (prevStmt->GetOpCode() == OP_iassign) { - IassignNode *iassign = static_cast(prevStmt); - BaseNode *addr_expr = iassign->Opnd(0); - if (addr_expr->GetOpCode() == OP_iread) { - std::vector dump_reg; - recursion(addr_expr, dump_reg); - for (int32 reg_tmp : dump_reg) { - if (reg_tmp == register_number) { - return prevStmt; - } - } - ret_stmt = retLatest_Regassignment(prevStmt, register_number); - } else if (addr_expr->GetOpCode() == OP_regread) { - RegreadNode *regread = static_cast(addr_expr); - if (register_number == regread->GetRegIdx()) { - return prevStmt; - } else { - ret_stmt = retLatest_Regassignment(prevStmt, register_number); - } - } else if (IsCommutative(addr_expr->GetOpCode())) { - /* - 0th stmt: add u64 ( - iread u64 <* <$_TY_IDX111>> 22 (regread ptr %177), - cvt u64 i32 (mul i32 (regread i32 %190, constval i32 2))) - */ - // We just assume its sth like register +/- sth patterns - std::vector dump_reg; - recursion(addr_expr->Opnd(0), dump_reg); - for (int32 reg_tmp : dump_reg) { - if (reg_tmp == register_number) { - return prevStmt; - } - } - ret_stmt = retLatest_Regassignment(prevStmt, register_number); - } else { - ret_stmt = retLatest_Regassignment(prevStmt, register_number); - } - } else { - ret_stmt = retLatest_Regassignment(prevStmt, register_number); - } - } - return ret_stmt; -} - -StmtNode *retLatest_Varassignment(StmtNode *stmt, uint32 var_number) { - StmtNode *ret_stmt = nullptr; - StmtNode *prevStmt = stmt->GetPrev(); - if (prevStmt != nullptr) { - if (prevStmt->GetOpCode() == OP_dassign || prevStmt->GetOpCode() == OP_maydassign) { - DassignNode *dassign = static_cast(prevStmt); - if (var_number == dassign->GetStIdx().Idx()) { - return prevStmt; - } else { - ret_stmt = retLatest_Varassignment(prevStmt, var_number); - } - } else if (prevStmt->GetOpCode() == OP_iassign) { - IassignNode *iassign = static_cast(prevStmt); - BaseNode *addr_expr = iassign->Opnd(0); - if (addr_expr->GetOpCode() == OP_dread) { - // dread i64 %asan_shadowBase - DreadNode *dread = static_cast(addr_expr); - if (var_number == dread->GetStIdx().Idx()) { - return prevStmt; - } else { - ret_stmt = retLatest_Varassignment(prevStmt, var_number); - } - } else { - ret_stmt = retLatest_Varassignment(prevStmt, var_number); - } - } else { - ret_stmt = retLatest_Varassignment(prevStmt, var_number); - } - } - return ret_stmt; -} - -void print_dep(set_check dep) { - LogInfo::MapleLogger() << "\nOpcode: "; - for (auto opcode_tmp : dep.opcode) { - LogInfo::MapleLogger() << int(opcode_tmp) << ","; - } - LogInfo::MapleLogger() << "\n"; -} - -std::set OP_code_blacklist{ - OP_addroffunc, OP_iaddrof, OP_addrof, OP_iread, OP_ireadoff, OP_iassign, OP_dread, OP_regread, OP_regassign, - OP_dassign, OP_maydassign, OP_iassignoff, OP_iassignfpoff, - // We only handle the SAN-SAN case - // // check with edit distance ==1 - OP_cvt, - // candidnate: - // OP_band, - // OP_zext, - // OP_ashr, - // // check with edit distance ==2 - // OP_add, - // OP_sub, - OP_constval, - // candidnate: - // OP_add, - // OP_ashr -}; - -std::set OP_code_re_map{OP_eq, OP_ge, OP_gt, OP_le, OP_lt, OP_ne, OP_cmp, OP_cmpl, OP_cmpg}; - -void dep_iassign_expansion(IassignNode *iassign, set_check &dep) { - BaseNode *rhs_expr = iassign->Opnd(1); - if (rhs_expr->GetOpCode() == OP_regread) { - // Case 1. regread u32 %13 - RegreadNode *regread = static_cast(rhs_expr); - dep.register_live.push(regread->GetRegIdx()); - } else if (rhs_expr->GetOpCode() == OP_constval) { - // Case 2. constval i32 0 -> terminal - ConstvalNode *constValNode = static_cast(rhs_expr); - MIRConst *mirConst = constValNode->GetConstVal(); - if (mirConst != nullptr) { - if (mirConst->GetKind() == kConstInt) { - auto *const_to_get_value = safe_cast(mirConst); - dep.const_int64.push_back(const_to_get_value->GetValue()); - } - } - } else if (rhs_expr->GetOpCode() == OP_iread) { - // Case 3. iread agg <* <$_TY_IDX334>> 0 (regread ptr %4) -> Only hold the ptr for deref - std::vector dump_reg; - recursion(rhs_expr, dump_reg); - for (int32 reg_temp : dump_reg) { - dep.register_live.push(reg_temp); - } - } else { - // Case 4. zext u32 8 (lshr u32 (regread u32 %4, constval i32 24)) - // Just assume it can be further expand and treat as a terminal... - // Some of this of compound stmt are register - // assigned by callassigned or function input register - // Although there are some case didn't like this - // We can set it as terminal register to prevent recursively deref - // since it may crash - // A proper SSA likely fix this issue - std::vector dump_reg; - recursion(rhs_expr, dump_reg); - for (int32 reg_temp : dump_reg) { - dep.register_terminal.push_back(reg_temp); - } - } -} - -void dep_constval_expansion(ConstvalNode *constValNode, set_check &dep) { - MIRConst *mirConst = constValNode->GetConstVal(); - // we only trace int64 - // We didn't handle following cases - // kConstFloatConst, MIRFloatConst - // kConstDoubleConst, MIRDoubleConst - if (mirConst != nullptr) { - if (mirConst->GetKind() == kConstInt) { - auto *const_to_get_value = safe_cast(mirConst); - dep.const_int64.push_back(const_to_get_value->GetValue()); - } - } -} - -void dep_dassign_expansion(DassignNode *dassign, set_check &dep, std::map> reg_to_stmt, - std::map> var_to_stmt, MeFunction func) { - std::stack san_blacklist_stack; - bool required_to_clean_san = false; - if (func.GetMIRModule().CurFunction()->GetSymbolTabSize() >= dassign->GetStIdx().Idx()) { - MIRSymbol *var = func.GetMIRModule().CurFunction()->GetSymbolTabItem(dassign->GetStIdx().Idx()); - if (var->GetName().find("asan_length") == 0) { - // dassign %asan_length 0 (band i64 (dread i64 %asan_addr, constval i64 7)) - san_blacklist_stack.push(OP_band); - san_blacklist_stack.push(OP_add); - required_to_clean_san = true; - } else if (var->GetName().find("asan_shadowValue") == 0) { - san_blacklist_stack.push(OP_ashr); - san_blacklist_stack.push(OP_add); - required_to_clean_san = true; - } - } - if (required_to_clean_san) { - if (san_blacklist_stack.size() >= dep.opcode.size()) { - for (size_t opcode_vect_i = 0; opcode_vect_i < dep.opcode.size(); ++opcode_vect_i) { - dep.opcode.pop_back(); - } - } else { - while (!san_blacklist_stack.empty()) { - bool done = false; - uint8 remove_item = san_blacklist_stack.top(); - san_blacklist_stack.pop(); - LogInfo::MapleLogger() << remove_item; - for (std::vector::iterator it = dep.opcode.begin(); it != dep.opcode.end(); ++it) { - if (*it == remove_item && !done) { - dep.opcode.erase(it); - done = true; - } - } - } - } - } - for (size_t i = 0; i < dassign->NumOpnds(); i++) { - dep_expansion(dassign->Opnd(i), dep, reg_to_stmt, var_to_stmt, func); - } -} - -void dep_expansion(BaseNode *stmt, set_check &dep, std::map> reg_to_stmt, - std::map> var_to_stmt, const MeFunction& func) { - if ((!OP_code_blacklist.count(stmt->GetOpCode())) && (!OP_code_re_map.count(stmt->GetOpCode()))) { - dep.opcode.push_back(stmt->GetOpCode()); - } else if (OP_code_re_map.count(stmt->GetOpCode())) { - dep.opcode.push_back(uint8(253)); - } - switch (stmt->GetOpCode()) { - case OP_iassign: { - IassignNode *iassign = static_cast(stmt); - dep_iassign_expansion(iassign, dep); - break; - } - case OP_regread: { - RegreadNode *regread = static_cast(stmt); - dep.register_live.push(regread->GetRegIdx()); - break; - } - case OP_constval: { - ConstvalNode *constValNode = static_cast(stmt); - dep_constval_expansion(constValNode, dep); - break; - } - case OP_conststr: { - ConststrNode *conststr = static_cast(stmt); - dep.const_str.push_back(conststr->GetStrIdx()); - break; - } - case OP_conststr16: { - Conststr16Node *conststr16 = static_cast(stmt); - dep.const_str.push_back(conststr16->GetStrIdx()); - break; - } - case OP_dread: { - DreadNode *dread = static_cast(stmt); - dep.var_live.push(dread->GetStIdx().Idx()); - break; - } - case OP_addrof: { - AddrofNode *addrof = static_cast(stmt); - dep.var_live.push(addrof->GetStIdx().Idx()); - break; - } - case OP_addroffunc: { - // We don't handle function pointer - break; - } - case OP_dassign: { - DassignNode *dassign = static_cast(stmt); - dep_dassign_expansion(dassign, dep, reg_to_stmt, var_to_stmt, func); - break; - } - case OP_dassignoff: { - // TODO: - // It is not documented in MAPLE IR. - break; - } - default: { - for (size_t i = 0; i < stmt->NumOpnds(); i++) { - dep_expansion(stmt->Opnd(i), dep, reg_to_stmt, var_to_stmt, func); - } - break; - } - } -} - -set_check commit(set_check old, set_check latest) { - old.opcode.insert(old.opcode.end(), latest.opcode.begin(), latest.opcode.end()); - old.register_terminal.insert(old.register_terminal.end(), latest.register_terminal.begin(), - latest.register_terminal.end()); - old.var_terminal.insert(old.var_terminal.end(), latest.var_terminal.begin(), latest.var_terminal.end()); - old.const_int64.insert(old.const_int64.end(), latest.const_int64.begin(), latest.const_int64.end()); - old.const_str.insert(old.const_str.end(), latest.const_str.begin(), latest.const_str.end()); - old.type_num.insert(old.type_num.end(), latest.type_num.begin(), latest.type_num.end()); - return old; -} - -bool sat_check(const set_check& a, const set_check& b) { - if (compareVectors(a.opcode, b.opcode) - /* - A strict check should also check - compareVectors(a.register_terminal,b.register_terminal) - compareVectors(a.var_terminal,b.var_terminal) - compareVectors(a.const_int64,b.const_int64) - */ - ) { - return true; - } - return false; -} - -void gen_register_dep(StmtNode *stmt, set_check &br_tmp, std::map> reg_to_stmt, - std::map> var_to_stmt, const MeFunction& func) { - while (!br_tmp.register_live.empty()) { - int32_t register_to_check = br_tmp.register_live.top(); - auto iter = reg_to_stmt.find(register_to_check); - br_tmp.register_live.pop(); - if (iter != reg_to_stmt.end()) { - StmtNode *latest_stmt_tmp = retLatest_Regassignment(stmt, register_to_check); - if (latest_stmt_tmp != nullptr) { - set_check br_tmp_go; - dep_expansion(latest_stmt_tmp, br_tmp_go, reg_to_stmt, var_to_stmt, func); - gen_register_dep(latest_stmt_tmp, br_tmp_go, reg_to_stmt, var_to_stmt, func); - br_tmp = commit(br_tmp, br_tmp_go); - } - } else { - br_tmp.register_terminal.push_back(register_to_check); - } - } - while (!br_tmp.var_live.empty()) { - uint32_t var_to_check = br_tmp.var_live.top(); - auto iter = var_to_stmt.find(var_to_check); - br_tmp.var_live.pop(); - if (iter != var_to_stmt.end()) { - StmtNode *latest_stmt_tmp = retLatest_Varassignment(stmt, var_to_check); - if (latest_stmt_tmp != nullptr) { - set_check br_tmp_go_var; - dep_expansion(latest_stmt_tmp, br_tmp_go_var, reg_to_stmt, var_to_stmt, func); - gen_register_dep(latest_stmt_tmp, br_tmp_go_var, reg_to_stmt, var_to_stmt, func); - br_tmp = commit(br_tmp, br_tmp_go_var); - } - } else { - br_tmp.var_terminal.push_back(var_to_check); - } - } -} - -bool isVar_redefined(BaseNode *stmt, std::vector &stmt_reg) { - switch (stmt->GetOpCode()) { - case OP_dread: { - DreadNode *dread = static_cast(stmt); - stmt_reg.push_back(dread->GetStIdx().Idx()); - break; - } - default: { - for (size_t i = 0; i < stmt->NumOpnds(); i++) { - isVar_redefined(stmt->Opnd(i), stmt_reg); - } - } - } - if (stmt->GetOpCode() == OP_dassign || stmt->GetOpCode() == OP_maydassign) { - DassignNode *dassign = static_cast(stmt); - if (std::count(stmt_reg.begin(), stmt_reg.end(), dassign->GetStIdx().Idx())) { - // value update - return false; - } else { - return true; - } - } - return false; -} - -void recursion(BaseNode *stmt, std::vector &stmt_reg) { - switch (stmt->GetOpCode()) { - case OP_regread: { - RegreadNode *regread = static_cast(stmt); - stmt_reg.push_back(regread->GetRegIdx()); - break; - } - default: { - for (size_t i = 0; i < stmt->NumOpnds(); i++) { - recursion(stmt->Opnd(i), stmt_reg); - } - } - } -} - -// stmtID to reduciable stmt ID -std::map gen_dynmatch(std::string file_name) { - // read log files and parse the stmtID with br information - FILE *fp; - auto log_name = file_name + ".log"; - // to hold the temp data - std::map ret_log_update; - - fp = fopen(("./" + log_name).c_str(), "r"); - if (fp == nullptr) { - abort(); - } - // 1. Parse SAN-SAN - while (true) { - int cur_id; - int rc = fscanf_s(fp, "%d", &cur_id, sizeof(cur_id)); - if (rc != 1) { - break; - } - int stmt_ID_cur = cur_id >> 1; - int br_true_tmp = (stmt_ID_cur << 1) ^ cur_id; - if (ret_log_update.count(stmt_ID_cur)) { - // L:0, R:1 - if (br_true_tmp == 1) { - ret_log_update[stmt_ID_cur].r_ctr += 1; - } else { - ret_log_update[stmt_ID_cur].l_ctr += 1; - } - ret_log_update[stmt_ID_cur].tot_ctr += 1; - } else { - san_struct tmp_san_struct; - tmp_san_struct.stmtID = stmt_ID_cur; - tmp_san_struct.tot_ctr = 1; - tmp_san_struct.l_ctr = 0; - tmp_san_struct.r_ctr = 0; - if (br_true_tmp == 1) { - tmp_san_struct.r_ctr += 1; - } else { - tmp_san_struct.l_ctr += 1; - } - ret_log_update[stmt_ID_cur] = tmp_san_struct; - } - } - fclose(fp); - return ret_log_update; -} - -bool dynamic_sat(const san_struct& a, const san_struct& b, bool SCSC) { - // For SC-UC case, SC must be var a - if (a.tot_ctr == b.tot_ctr) { - if ((a.l_ctr == b.l_ctr) || (a.l_ctr == b.r_ctr)) { - return true; - } else { - return false; - } - } else if (!SCSC) { - // true is 0 - if (a.tot_ctr == b.l_ctr) { - if ((a.tot_ctr == a.l_ctr) || (a.tot_ctr == a.r_ctr)) { - return true; - } else { - return false; - } - } else if (a.tot_ctr == b.r_ctr) { - if ((a.tot_ctr == a.l_ctr) || (a.tot_ctr == a.r_ctr)) { - return true; - } else { - return false; - } - } else { - return false; - } - } else { - return false; - } -} - } // namespace maple #endif \ No newline at end of file diff --git a/src/mapleall/maple_san/src/san_phase_manager.cpp b/src/mapleall/maple_san/src/san_phase_manager.cpp index d55f50bc8c61b2e97e72ce05449ddb1dfed6791a..6160d1464c2c9652f26f7df7a7c7ebfc039b76ca 100644 --- a/src/mapleall/maple_san/src/san_phase_manager.cpp +++ b/src/mapleall/maple_san/src/san_phase_manager.cpp @@ -3,10 +3,15 @@ #include "san_phase_manager.h" #include "asan_phases.h" #include "ubsan_phases.h" - +#include "san_common.h" +#include "asan_razor.h" +#include "asan_asap.h" namespace maple { +AsanVirtualBlockList *AsanBlockListPtr = nullptr; +ModuleAddressSanitizer *AsanModulePtr = nullptr; + void MEModuleDoAsan::GetAnalysisDependence(maple::AnalysisDep &aDep) const { aDep.SetPreservedAll(); } @@ -17,11 +22,20 @@ void MEModuleDoAsan::DoPhasesPopulate(const maple::MIRModule &mirModule) { #undef SAN_PHASE } +bool MEModuleDoAsan::SanRazorEnabled() const { + for (size_t i = 0; i < phasesSequence.size(); ++i) { + const MaplePhaseInfo *curPhase = MaplePhaseRegister::GetMaplePhaseRegister()->GetPhaseByID(phasesSequence[i]); + if (curPhase->PhaseName() == "doAsanRazor") return true; + } + return false; +} + bool MEModuleDoAsan::FuncLevelRun(MeFunction &meFunc, AnalysisDataManager &serialADM) { bool changed = false; for (size_t i = 0; i < phasesSequence.size(); ++i) { SolveSkipFrom(MeOption::GetSkipFromPhase(), i); const MaplePhaseInfo *curPhase = MaplePhaseRegister::GetMaplePhaseRegister()->GetPhaseByID(phasesSequence[i]); + // if (curPhase->PhaseName() == "doAsanRazor") continue; if (!IsQuiet()) { LogInfo::MapleLogger() << "---Run maple_san " << (curPhase->IsAnalysis() ? "analysis" : "transform") << " Phase [ " << curPhase->PhaseName() << " ]---\n"; @@ -36,16 +50,53 @@ bool MEModuleDoAsan::FuncLevelRun(MeFunction &meFunc, AnalysisDataManager &seria return changed; } +bool MEModuleDoAsan::FuncLevelSanRazorRun(MeFunction &meFunc, AnalysisDataManager &serialADM) { + bool changed = false; + for (size_t i = 0; i < phasesSequence.size(); ++i) { + SolveSkipFrom(MeOption::GetSkipFromPhase(), i); + const MaplePhaseInfo *curPhase = MaplePhaseRegister::GetMaplePhaseRegister()->GetPhaseByID(phasesSequence[i]); + if (curPhase->PhaseName() != "doAsanRazor") continue; + if (!IsQuiet()) { + LogInfo::MapleLogger() << "---Run SanRazor " << (curPhase->IsAnalysis() ? "analysis" : "transform") + << " Phase [ " << curPhase->PhaseName() << " ]---\n"; + } + if (curPhase->IsAnalysis()) { + changed |= RunAnalysisPhase(*curPhase, serialADM, meFunc); + } else { + changed |= RunTransformPhase(*curPhase, serialADM, meFunc); + } + SolveSkipAfter(MeOption::GetSkipAfterPhase(), i); + } + return changed; +} + bool MEModuleDoAsan::PhaseRun(maple::MIRModule &m) { bool changed = false; // TODO: We have not instrumented global values - // TODO: ModuleAddressSanitizer AsanModule(m); - // TODO: AsanModule.instrumentModule(); auto &compFuncList = m.GetFunctionList(); auto admMempool = AllocateMemPoolInPhaseManager("ASAN phase manager's analysis data manager mempool"); auto *serialADM = GetManagerMemPool()->New(*(admMempool.get())); + CHECK_FATAL(AsanBlockListPtr == nullptr, "AsanBlockListPtr should be nullptr"); + AsanBlockListPtr = m.GetMemPool()->New(m); ClearAllPhases(); DoPhasesPopulate(m); + bool isSanRazorEnabled = SanRazorEnabled(); + ModuleAddressSanitizer AsanModule(m); + AsanModulePtr = &AsanModule; + if (isSanRazorEnabled) { + InitializeSanRazorGlobalNames(AsanModule); + if (SanRazorIs2ndCompile) { + SanRazorCheckIDMap.clear(); + LoadSanRazorCheckIDMap(SanRazorCheckIDMapFileName); + LoadSanRazorLog(); + LoadAllSanRazorStmtKeyCheckIDMaps(); + InitializeOrderedStmtKeyCountPairs(); + ASAPMode mode = ASAPMode::NUM; + LoadASAPThreshold(mode); + DecideToBeRemoved(mode); + SetThisFileASAPToRemoveStmtIDs(AsanModule.GetSrcFilePath()); + } + } SetQuiet(MeOption::quiet); size_t i = 0; for (auto &func : std::as_const(compFuncList)) { @@ -58,23 +109,33 @@ bool MEModuleDoAsan::PhaseRun(maple::MIRModule &m) { if (!IsQuiet()) { LogInfo::MapleLogger() << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>> Sanitize Function < " << func->GetName() << " id=" << func->GetPuidxOrigin() << " >---\n"; - /* prepare me func */ - auto meFuncMP = std::make_unique(memPoolCtrler, "maple_san per-function mempool"); - auto meFuncStackMP = std::make_unique(memPoolCtrler, ""); - MemPool *versMP = new ThreadLocalMemPool(memPoolCtrler, "first verst mempool"); - MeFunction &meFunc = *(meFuncMP->New(&m, func, meFuncMP.get(), *meFuncStackMP, versMP, meInput)); - func->SetMeFunc(&meFunc); - meFunc.PartialInit(); - if (!IsQuiet()) { - LogInfo::MapleLogger() << "---Preparing Function < " << func->GetName() << " > [" << i - 1 << "] ---\n"; - } - meFunc.Prepare(); - changed = FuncLevelRun(meFunc, *serialADM); - meFunc.Release(); - serialADM->EraseAllAnalysisPhase(); } + /* prepare me func */ + auto meFuncMP = std::make_unique(memPoolCtrler, "maple_san per-function mempool"); + auto meFuncStackMP = std::make_unique(memPoolCtrler, ""); + MemPool *versMP = new ThreadLocalMemPool(memPoolCtrler, "first verst mempool"); + MeFunction &meFunc = *(meFuncMP->New(&m, func, meFuncMP.get(), *meFuncStackMP, versMP, meInput)); + func->SetMeFunc(&meFunc); + meFunc.PartialInit(); + if (!IsQuiet()) { + LogInfo::MapleLogger() << "---Preparing Function < " << func->GetName() << " > [" << i - 1 << "] ---\n"; + } + meFunc.Prepare(); + changed |= FuncLevelRun(meFunc, *serialADM); + meFunc.Release(); + serialADM->EraseAllAnalysisPhase(); + } + + // we add the call __asan_init via __cxx_global_var_init at last + AsanModule.instrumentModule(); + if (isSanRazorEnabled && !SanRazorIs2ndCompile) { + AddASanRazorSignalHandlerRegister(AsanModule); + AddSanRazorAppendInfo(AsanModule); + SetSanRazorGlobals(AsanModule); + DumpSanRazorCheckIDMap(SanRazorCheckIDMapFileName); } m.Emit("comb.san.mpl"); + AsanModulePtr = nullptr; return changed; } @@ -82,8 +143,10 @@ MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MEModuleDoAsan, doModuleAsan) MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MEDoAsan, doAsan); MAPLE_ANALYSIS_PHASE_REGISTER(MEDoVarCheck, doAsanVarCheck); +MAPLE_ANALYSIS_PHASE_REGISTER(MEDoNeighborOpt, doNeighborOpt); +MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MEASanRazor, doAsanRazor); MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MEDoUbsanBound, doUbsanBound); } // namespace maple -#endif \ No newline at end of file +#endif diff --git a/src/mapleall/maple_san/src/ubsan_bounds.cpp b/src/mapleall/maple_san/src/ubsan_bounds.cpp index ee09f10be4234e1677a01da1507fc2487424f566..21d9398f538d28bf1c7cfd0bf7c1d93b16d483df 100644 --- a/src/mapleall/maple_san/src/ubsan_bounds.cpp +++ b/src/mapleall/maple_san/src/ubsan_bounds.cpp @@ -7,6 +7,7 @@ #include "me_function.h" #include "mir_builder.h" #include "san_common.h" +#include "mir_nodes.h" namespace maple { @@ -99,8 +100,7 @@ namespace maple { continue; } ArrayNode *arrayNode = dynamic_cast(iassign->Opnd(0)); - MIRArrayType *tmpArrayType = dynamic_cast(arrayNode-> - GetArrayType(GlobalTables::GetTypeTable())); + MIRArrayType *tmpArrayType = dynamic_cast(arrayNode->GetArrayType()); ArrayInfo arrayInfo(stmtNode, tmpArrayType, arrayNode); MIRType *mirType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(iassign->GetTyIdx()); @@ -122,8 +122,7 @@ namespace maple { continue; } ArrayNode *arrayNode = dynamic_cast(iread->Opnd(0)); - MIRArrayType *tmpArrayType = dynamic_cast(arrayNode-> - GetArrayType(GlobalTables::GetTypeTable())); + MIRArrayType *tmpArrayType = dynamic_cast(arrayNode->GetArrayType()); ArrayInfo arrayInfo(stmtNode, tmpArrayType, arrayNode); MIRType *mirType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(iread->GetTyIdx()); diff --git a/src/mrt/libsan/CMakeLists.txt b/src/mrt/libsan/CMakeLists.txt index ce683408f97c470bb36f79d09acffc02b606d5d8..b2bc27aeeee568760c1c67211bc850b823335b49 100644 --- a/src/mrt/libsan/CMakeLists.txt +++ b/src/mrt/libsan/CMakeLists.txt @@ -1,58 +1,59 @@ cmake_minimum_required(VERSION 3.23.0) set(MAPLE_ROOT $ENV{MAPLE_ROOT}) -set(MAPLE_MRT_ROOT ${MAPLE_ROOT}/src/mrt) - -set(LINARO_CXX "${MAPLE_ROOT}/tools/gcc-linaro-7.5.0") -set(CMAKE_CXX_COMPILER "${LINARO_CXX}/bin/aarch64-linux-gnu-g++") +set(LINARO ${MAPLE_ROOT}/tools/gcc-linaro-7.5.0) set(inc_dirs -${MAPLE_MRT_ROOT}/libsan/asan -${MAPLE_MRT_ROOT}/libsan/interception -${LINARO_CXX}/aarch64-linux-gnu/include/c++/7.5.0 -${LINARO_CXX}/aarch64-linux-gnu/include/c++/7.5.0/aarch64-linux-gnu -${LINARO_CXX}/aarch64-linux-gnu/libc/usr/include -${LINARO_CXX}/lib/gcc/aarch64-linux-gnu/7.5.0/include -${LINARO_CXX}/aarch64-linux-gnu/libc/usr/include/linux +${MAPLE_ROOT}/src/mrt/libsan/asan +${MAPLE_ROOT}/src/mrt/libsan/interception +${LINARO}/aarch64-linux-gnu/include/c++/7.5.0 +${LINARO}/aarch64-linux-gnu/include/c++/7.5.0/aarch64-linux-gnu +${LINARO}/aarch64-linux-gnu/libc/usr/include +${LINARO}/lib/gcc/aarch64-linux-gnu/7.5.0/include +${LINARO}/aarch64-linux-gnu/libc/usr/include/linux ) set(src_libsan -${MAPLE_MRT_ROOT}/libsan/asan/asan_interceptors.cpp -${MAPLE_MRT_ROOT}/libsan/asan/asan_report.cpp -${MAPLE_MRT_ROOT}/libsan/asan/sanitizer_libc.cpp -${MAPLE_MRT_ROOT}/libsan/asan/asan_stubs.cpp -${MAPLE_MRT_ROOT}/libsan/asan/asan_malloc_linux.cpp -${MAPLE_MRT_ROOT}/libsan/asan/sanitizer_termination.cpp -${MAPLE_MRT_ROOT}/libsan/asan/asan_flags.cpp -${MAPLE_MRT_ROOT}/libsan/asan/sanitizer_flags.cpp -${MAPLE_MRT_ROOT}/libsan/asan/asan_rtl.cpp -${MAPLE_MRT_ROOT}/libsan/asan/sanitizer_printf.cpp -${MAPLE_MRT_ROOT}/libsan/asan/asan_shadow_memory.cpp -${MAPLE_MRT_ROOT}/libsan/asan/sanitizer_linux_syscall.cpp -${MAPLE_MRT_ROOT}/libsan/asan/asan_alloctor.cpp -${MAPLE_MRT_ROOT}/libsan/asan/asan_mapping.cpp -${MAPLE_MRT_ROOT}/libsan/asan/asan_fake_stack.cpp -${MAPLE_MRT_ROOT}/libsan/asan/asan_interceptors_memintrinsics.cpp -${MAPLE_MRT_ROOT}/libsan/asan/asan_signal_handler_linux.cpp -${MAPLE_MRT_ROOT}/libsan/asan/asan_poisoning.cpp -${MAPLE_MRT_ROOT}/libsan/asan/asan_avltree.cpp -${MAPLE_MRT_ROOT}/libsan/interception/interception_linux.cpp +asan/asan_interceptors.cpp +asan/asan_report.cpp +asan/sanitizer_libc.cpp +asan/asan_stubs.cpp +asan/asan_malloc_linux.cpp +asan/sanitizer_termination.cpp +asan/asan_flags.cpp +asan/sanitizer_flags.cpp +asan/asan_rtl.cpp +asan/sanitizer_printf.cpp +asan/asan_shadow_memory.cpp +asan/sanitizer_linux_syscall.cpp +asan/asan_alloctor.cpp +asan/asan_mapping.cpp +asan/asan_fake_stack.cpp +asan/asan_interceptors_memintrinsics.cpp +asan/asan_signal_handler_linux.cpp +asan/asan_poisoning.cpp +asan/asan_avltree.cpp +interception/interception_linux.cpp ) -set(CMAKE_CXX_FLAGS "-U __SIZEOF_INT128__ -pthread -ldl -nostdinc -fno-exceptions") +add_library(asan-dynamic SHARED ${src_libsan}) +add_library(asan-static STATIC ${src_libsan}) + +# set(CMAKE_C_COMPILER ${MAPLE_ROOT}/output/tools/gcc-linaro-7.5.0/bin/aarch64-linux-gnu-gcc) +# set(CMAKE_CXX_COMPILER ${MAPLE_ROOT}/output/tools/gcc-linaro-7.5.0/bin/aarch64-linux-gnu-g++) -add_library(asan-dynamic-rt SHARED ${src_libsan}) -add_library(asan-static-rt STATIC ${src_libsan}) +set(CMAKE_C_COMPILER ${LINARO}/bin/aarch64-linux-gnu-gcc) +set(CMAKE_CXX_COMPILER ${LINARO}/bin/aarch64-linux-gnu-g++) -set_target_properties(asan-dynamic-rt PROPERTIES - COMPILE_FLAGS "" +set_target_properties(asan-dynamic PROPERTIES + COMPILE_FLAGS "-O3 -g -nostdinc -nostdinc++ -fno-exceptions" INCLUDE_DIRECTORIES "${inc_dirs}" LINK_LIBRARIES "" OUTPUT_NAME "asan" ) -set_target_properties(asan-static-rt PROPERTIES - COMPILE_FLAGS "" +set_target_properties(asan-static PROPERTIES + COMPILE_FLAGS "-O3 -g -nostdinc -nostdinc++ -fno-exceptions" INCLUDE_DIRECTORIES "${inc_dirs}" LINK_LIBRARIES "" OUTPUT_NAME "asan" diff --git a/src/mrt/libsan/README.md b/src/mrt/libsan/README.md new file mode 100644 index 0000000000000000000000000000000000000000..33841c32f707468d8a275c284e633aef02b30565 --- /dev/null +++ b/src/mrt/libsan/README.md @@ -0,0 +1,6 @@ +# BiSheng ASAN runtime +To build the runtime library +``` +mkdir build && cd build && cmake .. && make -j +``` + diff --git a/src/mrt/libsan/asan/asan_alloctor.cpp b/src/mrt/libsan/asan/asan_alloctor.cpp index 9469fa477cdf6784b311be37c67e785b72caf4fa..6b45cb41aec207967592edc6c079d33683beb744 100644 --- a/src/mrt/libsan/asan/asan_alloctor.cpp +++ b/src/mrt/libsan/asan/asan_alloctor.cpp @@ -26,6 +26,7 @@ void AsanChunkTree::ClearAllChunks() { VReport(ASAN_LOG_DEBUG, "Free all AsanChunks\n"); if (root != nullptr) FreeSubTree(root); + root = nullptr; } AsanChunkTree::~AsanChunkTree() { @@ -68,34 +69,6 @@ AsanChunk* AsanChunkTree::FindChunkContainsAddr(AsanChunk* node, uptr addr) cons } } -AsanChunk* AsanChunkTree::FindChunkAtLeft(uptr addr) const { - AsanChunk* pre = nullptr; - AsanChunk* cur = root; - while (cur != nullptr) { - if (cur->end_addr < addr) { - // cur is at the left - pre = cur; - cur = cur->right; - } - else if (cur->beg_addr > addr) { - // cur is at the right - pre = cur; - cur = cur->left; - } - else { - // addr in cur - return cur; - } - } - // Make sure [pre] is just at the left of addr - if (pre == nullptr) pre = cur; - if (pre->beg_addr > addr) return nullptr; - while (pre->right != nullptr && pre->right->beg_addr <= addr) { - pre = pre->right; - } - return pre; -} - AsanChunk* AsanChunkTree::GetListLeftAsanChunk(const AsanChunk* chunk) const { if (chunk == nullptr) return nullptr; if (chunk->left != nullptr) { @@ -103,7 +76,14 @@ AsanChunk* AsanChunkTree::GetListLeftAsanChunk(const AsanChunk* chunk) const { } if (chunk->father == nullptr) return nullptr; if (chunk->father->right == chunk) return chunk->father; - return nullptr; + // return nullptr; + AsanChunk* cur = chunk->father; + AsanChunk* ancestor = cur->father; + while (ancestor != nullptr && ancestor->left == cur) { + cur = ancestor; + ancestor = cur->father; + } + return ancestor; } AsanChunk* AsanChunkTree::GetListRightAsanChunk(const AsanChunk* chunk) const { @@ -113,25 +93,14 @@ AsanChunk* AsanChunkTree::GetListRightAsanChunk(const AsanChunk* chunk) const { } if (chunk->father == nullptr) return nullptr; if (chunk->father->left == chunk) return chunk->father; - return nullptr; -} - -int AsanChunkTree::PrepareToInsertChunk(const AsanChunk* chunk) { - AsanChunk* right_cur = FindChunkAtLeft(chunk->end_addr); - - // no overlap - if (right_cur == nullptr) return 0; - if (right_cur->end_addr < chunk->beg_addr) return 0; - - // remove overlaped chunks - AsanChunk* cur = right_cur; - AsanChunk* next_cur = nullptr; - while (cur != nullptr && cur->end_addr >= chunk->beg_addr) { - CHECK(cur->IsPoisonedChunk()); // Make sure it is a poisoned chunk - next_cur = GetListLeftAsanChunk(cur); - DeleteChunk(cur); - cur = next_cur; + // return nullptr; + AsanChunk* cur = chunk->father; + AsanChunk* ancestor = cur->father; + while (ancestor != nullptr && ancestor->right == cur) { + cur = ancestor; + ancestor = cur->father; } + return ancestor; } void AsanChunkTree::InsertChunk(AsanChunk* chunk) { diff --git a/src/mrt/libsan/asan/asan_alloctor.h b/src/mrt/libsan/asan/asan_alloctor.h index aeeb7553049568565ba221d53dbfaf58759ff0d7..cdfd95615cfa1a269cfaab0f021d46102932062f 100644 --- a/src/mrt/libsan/asan/asan_alloctor.h +++ b/src/mrt/libsan/asan/asan_alloctor.h @@ -25,8 +25,6 @@ public: AsanChunk* GetListLeftAsanChunk(const AsanChunk* chunk) const; AsanChunk* GetListRightAsanChunk(const AsanChunk* chunk) const; - AsanChunk* FindChunkAtLeft(uptr addr) const; - int PrepareToInsertChunk(const AsanChunk* chunk); void InsertChunk(AsanChunk* chunk); void DeleteChunk(AsanChunk* chunk); AsanChunk* CreateLeftRedzoneChunk(const AsanChunk* chunk); diff --git a/src/mrt/libsan/asan/asan_fake_stack.cpp b/src/mrt/libsan/asan/asan_fake_stack.cpp index 682816f6209dd78feeed62cd7bc4b8d4a8da1726..4c4f4eb20aa947cf7dbf563c39d891746af14d5f 100644 --- a/src/mrt/libsan/asan/asan_fake_stack.cpp +++ b/src/mrt/libsan/asan/asan_fake_stack.cpp @@ -3,7 +3,7 @@ #include "asan_internal.h" #include "asan_alloctor.h" #include "sanitizer_libc.h" - +#include "asan_report.h" namespace __sanitizer { @@ -30,7 +30,18 @@ SANITIZER_INTERFACE_ATTRIBUTE void __asan_alloca_poison(uptr addr, uptr size) { SANITIZER_INTERFACE_ATTRIBUTE void __asan_allocas_unpoison(uptr top, uptr bottom) { if ((!top) || (top > bottom)) return; - internal_memset((void*)top, 0, (bottom - top) / ASAN_SHADOW_GRANULARITY); + internal_memset((void*)MEM_TO_SHADOW(top), 0, (bottom - top) / ASAN_SHADOW_GRANULARITY); +} + +SANITIZER_INTERFACE_ATTRIBUTE +void __asan_check_stack(uptr top, uptr bottom) { + if ((!top) || (top > bottom)) return; + CheckAndReport((void*)top, bottom - top, false); +} + +SANITIZER_INTERFACE_ATTRIBUTE +void __asan_check_stack2(uptr top, uptr size) { + CheckAndReport((void*)top, size, false); } } diff --git a/src/mrt/libsan/asan/asan_interceptors.cpp b/src/mrt/libsan/asan/asan_interceptors.cpp index 844ac4b2b4b927792d01ff21cb7a5b96ef9dcd2c..5089d0fb5f18b98494f0bb16add4d412a4d2ebd2 100644 --- a/src/mrt/libsan/asan/asan_interceptors.cpp +++ b/src/mrt/libsan/asan/asan_interceptors.cpp @@ -16,7 +16,7 @@ using namespace __sanitizer; extern "C" { void InitializeAsanInterceptors() { - static bool was_called_once; + static bool was_called_once = false; ASSERT(!was_called_once, "__sanitizer::InitializeAsanInterceptors was called again"); was_called_once = true; @@ -30,6 +30,9 @@ void InitializeAsanInterceptors() { ASAN_INTERCEPT_FUNC(memset); ASAN_INTERCEPT_FUNC(memmove); + // InitializeCommonInterceptors(); + // InitializeSignalInterceptors(); + // Intercept str* functions. ASAN_INTERCEPT_FUNC(memcmp); ASAN_INTERCEPT_FUNC(strlen); @@ -56,8 +59,7 @@ void InitializeAsanInterceptors() { ASAN_INTERCEPT_FUNC_WITH_ADDR(strstr, __asan_strstr_1); char* (*__asan_strstr_2)(char *, const char *) = &strstr; ASAN_INTERCEPT_FUNC_WITH_ADDR(strstr, __asan_strstr_2); - // VReport(1, "AddressSanitizer: libc interceptors initialized\n"); } -} \ No newline at end of file +} diff --git a/src/mrt/libsan/asan/asan_malloc_linux.cpp b/src/mrt/libsan/asan/asan_malloc_linux.cpp index 1885839ad1ae10ce4ef5aba467e5fa7c5d03e782..1b528c76f052a30b706370f983be6811d04a489b 100644 --- a/src/mrt/libsan/asan/asan_malloc_linux.cpp +++ b/src/mrt/libsan/asan/asan_malloc_linux.cpp @@ -20,8 +20,10 @@ INTERCEPTOR(void, free, void *ptr) { if (asan_inited) { VReport(ASAN_LOG_DEBUG, "free %p\n", ptr); allocatorPtr->DoFree(ptr); + REAL(free(ptr)); } - REAL(free(ptr)); + // All memory allocated before __asan_init() will not be freed + // Anyway, it is mere 1 page (see ASAN_PAGE_SIZE) } INTERCEPTOR(void*, malloc, uptr size) { diff --git a/src/mrt/libsan/asan/asan_mapping.h b/src/mrt/libsan/asan/asan_mapping.h index 4a8dba1149e01e0eb6674feb198e992b5bbbf00f..5c171409060003f2cfad149176348dec9366742f 100644 --- a/src/mrt/libsan/asan/asan_mapping.h +++ b/src/mrt/libsan/asan/asan_mapping.h @@ -13,6 +13,7 @@ namespace __sanitizer { // #define ASAN_PAGE_SIZE 4096 extern uptr ASAN_PAGE_SIZE; // initialize while building shadow memory extern uptr ASAN_TOTAL_PHYS_PAGES; +void InitializePageSize(); // We add an extra page before and after the // kLowShadowBeg and KHighShadowEnd diff --git a/src/mrt/libsan/asan/asan_report.cpp b/src/mrt/libsan/asan/asan_report.cpp index a6209da972a7e92e58bd9d0cbb1cbf62908a6b88..be5516900cd4342f2cbfdf694f139867be45de00 100644 --- a/src/mrt/libsan/asan/asan_report.cpp +++ b/src/mrt/libsan/asan/asan_report.cpp @@ -102,7 +102,10 @@ void PrintMagicValueRoutine() { Printf(buff); } +static bool in_reporting = false; void ReportGenericError(uptr pc, uptr bp, uptr sp, uptr addr, bool is_write, uptr access_size, u32 exp, bool fatal) { + if (in_reporting) return; + in_reporting = true; if (__asan_test_only_reported_buggy_pointer) { *__asan_test_only_reported_buggy_pointer = addr; return; @@ -145,6 +148,7 @@ void ReportFreeNotMalloced(uptr pc, uptr bp, uptr sp, uptr addr) { } void CheckAndReport(const void *membeg, uptr size, bool is_write) { + if (!asan_inited) return; if (allocatorPtr->shadow->IsPoisonedMem(membeg, size)) { uptr err_addr = allocatorPtr->shadow->GetPoisonedAddr(membeg, size); GET_CALLER_PC_BP_SP; @@ -169,7 +173,12 @@ void ReportErrorInfo(uptr pc, uptr bp, uptr sp, uptr addr, const char *behavior, Printf(buff); } +static bool in_reporting_stack = false; void ReportStackTrace() { + // We may get error in this function, + // avoid recursive call in here + if (in_reporting_stack) return; + in_reporting_stack = true; static uptr stack_depth = 128; void *callstack[stack_depth]; size_t frames = backtrace(callstack, stack_depth); @@ -182,6 +191,8 @@ void ReportStackTrace() { } Printf("\n"); REAL(free(symbols)); + // report once, never reset to false + // in_reporting_stack = false; } void ReportCapturedDeadlySignal(int signal, uptr pc, uptr bp, uptr sp, SignalContext sc) { diff --git a/src/mrt/libsan/asan/asan_shadow_memory.cpp b/src/mrt/libsan/asan/asan_shadow_memory.cpp index 2dd3cc8892448990107dedfbbefa3a539565e898..a8dafeb1d111f71a2012da8d75c6cf724624bb59 100644 --- a/src/mrt/libsan/asan/asan_shadow_memory.cpp +++ b/src/mrt/libsan/asan/asan_shadow_memory.cpp @@ -53,7 +53,17 @@ int AsanShadowMem::InitShadow() { scale = ASAN_SHADOW_SCALE; scale_factor = 1 << scale; half_scale_factor = 1 << (scale - 1); + // Initialized in __asan_init + // kHighMemEnd = 0x7fffffffffffULL; + // kMidMemBeg = 0x3000000000ULL; + // kMidMemEnd = 0x4fffffffffULL; + // mmap to file + // fd = MmapFixed(beg, size, 0); + // low_mmap_fd = MmapFixed(kLowShadowBeg, kLowShadowEnd - kLowShadowBeg + 1, 0, "low_shadow"); + // high_mmap_fd = MmapFixed(kHighShadowBeg, kHighShadowEnd - kHighShadowBeg + 1, 0, "high_shadow"); + + // A crucial work uptr mmap_size = kLowShadowEnd - kLowShadowBeg + 1; uptr start_addr = kLowShadowBeg; char buff[30] = {0}; @@ -268,6 +278,10 @@ int AsanShadowMem::UnpoisonMem(const void *membeg, uptr size) { uptr shadowend = shadowbeg + shadowSize - 1; if (shadowSize > 1) { + // su_t firstByteValue = *(su_t*)(shadowbeg); + // if (begAffectedBytes != scale_factor) { + // firstByteValue = UnpoisonUnitTailBytes(firstByteValue, begAffectedBytes); + // } su_t firstByteValue = 0; // must be set to 0 su_t lastByteValue = 0; diff --git a/src/mrt/libsan/asan/sanitizer_libc.cpp b/src/mrt/libsan/asan/sanitizer_libc.cpp index 904eaf5f3b359edf5fc4412ff70090d1ada928ce..76b3bfcfbe2954be8a2ebb0351ef4d62774dc377 100644 --- a/src/mrt/libsan/asan/sanitizer_libc.cpp +++ b/src/mrt/libsan/asan/sanitizer_libc.cpp @@ -15,6 +15,10 @@ void* internal_malloc(uptr size) { static uptr offset = ASAN_SHADOW_OFFSET - 4096; static bool have_mmaped = false; if (!have_mmaped) { + if (offset == 0) { + offset = ASAN_SHADOW_OFFSET - 4096; + } + InitializePageSize(); MmapFixed(offset, 4096, 0, "internal_malloc"); } if (offset + size >= ASAN_SHADOW_OFFSET) { @@ -38,10 +42,6 @@ void* internal_realloc(void* addr, uptr size) { Die(); } -/** - * The following functions are part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * **/ - s64 internal_atoll(const char *nptr) { return internal_simple_strtoll(nptr, nullptr, 10); } @@ -101,9 +101,7 @@ void *internal_memrchr(const void *s, int c, uptr n) { const char *t = (const char *)s; void *res = nullptr; for (uptr i = 0; i < n; ++i, ++t) { - if (*t == c) { - res = reinterpret_cast(const_cast(t)); - } + if (*t == c) res = reinterpret_cast(const_cast(t)); } return res; } @@ -111,20 +109,17 @@ void *internal_memrchr(const void *s, int c, uptr n) { int internal_memcmp(const void* s1, const void* s2, uptr n) { const char *t1 = (const char *)s1; const char *t2 = (const char *)s2; - for (uptr i = 0; i < n; ++i, ++t1, ++t2) { - if (*t1 != *t2) { + for (uptr i = 0; i < n; ++i, ++t1, ++t2) + if (*t1 != *t2) return *t1 < *t2 ? -1 : 1; - } - } return 0; } void *internal_memcpy(void *dest, const void *src, uptr n) { char *d = (char*)dest; const char *s = (const char *)src; - for (uptr i = 0; i < n; ++i) { + for (uptr i = 0; i < n; ++i) d[i] = s[i]; - } return dest; } @@ -134,9 +129,8 @@ void *internal_memmove(void *dest, const void *src, uptr n) { sptr i, signed_n = (sptr)n; CHECK_GE(signed_n, 0); if (d < s) { - for (i = 0; i < signed_n; ++i) { - d[i] = s[i]; - } + for (i = 0; i < signed_n; ++i) + d[i] = s[i]; } else { if (d > s && signed_n > 0) { for (i = signed_n - 1; i >= 0; --i) { @@ -174,9 +168,8 @@ void *internal_memset(void* s, int c, uptr n) { uptr internal_strcspn(const char *s, const char *reject) { uptr i; for (i = 0; s[i]; i++) { - if (internal_strchr(reject, s[i])) { + if (internal_strchr(reject, s[i])) return i; - } } return i; } @@ -225,25 +218,22 @@ char* internal_strchr(const char *s, int c) { char *internal_strchrnul(const char *s, int c) { char *res = internal_strchr(s, c); - if (!res) { + if (!res) res = const_cast(s) + internal_strlen(s); - } return res; } char *internal_strrchr(const char *s, int c) { const char *res = nullptr; for (uptr i = 0; s[i]; i++) { - if (s[i] == c) { - res = s + i; - } + if (s[i] == c) res = s + i; } return const_cast(res); } uptr internal_strlen(const char *s) { uptr i = 0; - while (s[i] != '\0') i++; + while (s[i]) i++; return i; } @@ -263,9 +253,8 @@ uptr internal_strlcat(char *dst, const char *src, uptr maxlen) { char *internal_strncat(char *dst, const char *src, uptr n) { uptr len = internal_strlen(dst); uptr i; - for (i = 0; i < n && src[i]; i++) { + for (i = 0; i < n && src[i]; i++) dst[len + i] = src[i]; - } dst[len + i] = 0; return dst; } @@ -291,12 +280,12 @@ char *internal_strncpy(char *dst, const char *src, uptr n) { uptr internal_strnlen(const char *s, uptr maxlen) { uptr i = 0; - while (i < maxlen && s[i] != '\0') i++; + while (i < maxlen && s[i]) i++; return i; } char *internal_strstr(const char *haystack, const char *needle) { - // This is O(N^2), NEVER use it in hot place!!!!! + // This is O(N^2), but we are not using it in hot places. uptr len1 = internal_strlen(haystack); uptr len2 = internal_strlen(needle); if (len1 < len2) return nullptr; @@ -364,11 +353,9 @@ bool mem_is_zero(const char *beg, uptr size) { all |= *aligned_beg; // Epilogue. if ((char *)aligned_end >= beg) { - for (const char *mem = (char *)aligned_end; mem < end; mem++) { - all |= *mem; - } + for (const char *mem = (char *)aligned_end; mem < end; mem++) all |= *mem; } return all == 0; } -} \ No newline at end of file +} diff --git a/src/mrt/libsan/asan/sanitizer_termination.cpp b/src/mrt/libsan/asan/sanitizer_termination.cpp index b482455f14947b32daa88abce4c9189ad1c15e78..02531edbb65944085d7043b86caa03b67db8d8dc 100644 --- a/src/mrt/libsan/asan/sanitizer_termination.cpp +++ b/src/mrt/libsan/asan/sanitizer_termination.cpp @@ -39,11 +39,6 @@ bool RemoveDieCallback(DieCallbackType callback) { void NORETURN Die() { VReport(ASAN_LOG_DEBUG, "Dieing\n"); - // clear shadow and allocator - if (allocatorPtr != nullptr) { - allocatorPtr->OnDelete(); - REAL(free(allocatorPtr)); - } // TODO: support user callback for (int i = kMaxNumOfInternalDieCallbacks - 1; i >= 0; i--) { if (InternalDieCallbacks[i]) @@ -52,6 +47,11 @@ void NORETURN Die() { if (common_flags()->abort_on_error) Abort(); exit(common_flags()->exitcode); + // no need to clear shadow and allocator + // if (allocatorPtr != nullptr) { + // allocatorPtr->OnDelete(); + // REAL(free(allocatorPtr)); + // } } void NORETURN CheckFailed(const char *file, int line, const char *cond, diff --git a/src/mrt/sanrazor/.gitignore b/src/mrt/sanrazor/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..cbbd7a77a642419b61c917a01f5e5b65701c7cc7 --- /dev/null +++ b/src/mrt/sanrazor/.gitignore @@ -0,0 +1,2 @@ +*.o +build diff --git a/src/mrt/sanrazor/CMakeLists.txt b/src/mrt/sanrazor/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..eb8a1a28e12aa9c3c46ddf998c00c5149a91b8d0 --- /dev/null +++ b/src/mrt/sanrazor/CMakeLists.txt @@ -0,0 +1,36 @@ +cmake_minimum_required(VERSION 3.23.0) + +set(MAPLE_ROOT $ENV{MAPLE_ROOT}) + +set(inc_dirs +${MAPLE_ROOT}/src/mrt/sanrazor/include +${MAPLE_ROOT}/tools/gcc-linaro-7.5.0/aarch64-linux-gnu/include/c++/7.5.0 +${MAPLE_ROOT}/tools/gcc-linaro-7.5.0/aarch64-linux-gnu/include/c++/7.5.0/aarch64-linux-gnu +${MAPLE_ROOT}/tools/gcc-linaro-7.5.0/aarch64-linux-gnu/libc/usr/include +${MAPLE_ROOT}/tools/gcc-linaro-7.5.0/lib/gcc/aarch64-linux-gnu/7.5.0/include +${MAPLE_ROOT}/tools/gcc-linaro-7.5.0/aarch64-linux-gnu/libc/usr/include/linux +) + +set(src_libsanrazor +${MAPLE_ROOT}/src/mrt/sanrazor/src/sanrazor.c +) + +add_library(sanrazor-dynamic SHARED ${src_libsanrazor}) +add_library(sanrazor-static STATIC ${src_libsanrazor}) + +set(CMAKE_C_COMPILER ${MAPLE_ROOT}/tools/gcc-linaro-7.5.0/bin/aarch64-linux-gnu-gcc) +# set(CMAKE_CXX_COMPILER ${MAPLE_ROOT}/tools/gcc-linaro-7.5.0/bin/aarch64-linux-gnu-g++) + +set_target_properties(sanrazor-dynamic PROPERTIES + COMPILE_FLAGS "-O0 -g -nostdinc -nostdinc++ -fno-exceptions -DENABLE_MAPLE_SAN" + INCLUDE_DIRECTORIES "${inc_dirs}" + LINK_LIBRARIES "" + OUTPUT_NAME "sanrazor" +) + +set_target_properties(sanrazor-static PROPERTIES + COMPILE_FLAGS "-O0 -g -nostdinc -nostdinc++ -fno-exceptions -DENABLE_MAPLE_SAN" + INCLUDE_DIRECTORIES "${inc_dirs}" + LINK_LIBRARIES "" + OUTPUT_NAME "sanrazor" +) diff --git a/src/mrt/sanrazor/README.md b/src/mrt/sanrazor/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b195900b39fda6eb0e31d0fec6b88ec3932ff490 --- /dev/null +++ b/src/mrt/sanrazor/README.md @@ -0,0 +1,6 @@ +# SanRazor Profiling runtime +To build the runtime library +``` +mkdir build && cd build && cmake .. && make +``` + diff --git a/src/mrt/sanrazor/include/sanrazor.h b/src/mrt/sanrazor/include/sanrazor.h new file mode 100644 index 0000000000000000000000000000000000000000..3f17a665335dd44bc3092cb77f8c35ae40b36725 --- /dev/null +++ b/src/mrt/sanrazor/include/sanrazor.h @@ -0,0 +1,20 @@ +#define BOOL char +#define true 1 +#define false 0 + +// we design a linked list to record the files +struct SanSrcFileCovInfo { + char* filename; + uint64_t* array; + size_t size; + void* next; +}; + +void __san_cov_dump(); +void __san_cov_register(); +void __init_san_cov_srcfile(struct SanSrcFileCovInfo* info); +void __san_cov_signal_handler(int sig); +void __san_cov_abort(); +void __san_cov_append_info(uint64_t* array, BOOL *is_initialized, char* filename, size_t size); +void __san_cov_trace_pc(uint64_t* array, size_t id, BOOL *is_initialized, char* filename, size_t size); +void __san_cov_trace_pc2(uint64_t* array, size_t id); \ No newline at end of file diff --git a/src/mrt/sanrazor/src/sanrazor.c b/src/mrt/sanrazor/src/sanrazor.c new file mode 100644 index 0000000000000000000000000000000000000000..11bab30d3b4f64c63abc017d9fded3bdaf828ead --- /dev/null +++ b/src/mrt/sanrazor/src/sanrazor.c @@ -0,0 +1,161 @@ +#include +#include +#include +#include +#include +#include +#include "sanrazor.h" + +static BOOL not_registered = true; +static struct SanSrcFileCovInfo* info_list_head = NULL; +static struct SanSrcFileCovInfo* info_list_tail = NULL; + +void __asan_init(); + +char *get_dump_filename() { + char *dump_path = getenv("SANRAZOR_DUMP"); + if (dump_path == NULL) { + dump_path = (char *)malloc(1024); + snprintf(dump_path, 1024, "%s", "sanrazor.log"); + } + + size_t name_size = strlen(dump_path); + size_t log_id = 0; + while (access(dump_path, F_OK) == 0) { + snprintf(dump_path + name_size, 1024 - name_size, ".%zu", log_id); + log_id++; + } + return dump_path; +} + +void __init_san_cov_srcfile(struct SanSrcFileCovInfo* info) { + if (info_list_head == NULL) { + info_list_head = info; + info_list_tail = info; + } else { + info_list_tail->next = info; + info->next = NULL; + info_list_tail = info; + } +} + +void __san_free_info_list() { + struct SanSrcFileCovInfo* info = info_list_head; + struct SanSrcFileCovInfo* next = NULL; + while (info != NULL) { + next = info->next; + free(info); + info = next; + } + info_list_head = NULL; + info_list_tail = NULL; +} + +char* __san_cov_internal_ulltoa(unsigned long long value, char* str) { + static char digits[] = "0123456789abcdef"; + size_t idx = 0; + while (value > 0) { + unsigned long long tmp = (value & 0x0f); + str[idx++] = digits[tmp]; + value >>= 4; + } + if (idx == 0) { + str[0] = '0'; + str[1] = '\0'; + return str; + } + str[idx] = '\0'; + size_t len = 0; + for (char c = str[len]; c != '\0'; c = str[++len]); + // reverse str + for (size_t i = 0; i < len / 2; ++i) { + char tmp = str[i]; + str[i] = str[len - i - 1]; + str[len - i - 1] = tmp; + } + return str; +} + +void __san_cov_dump() { + struct SanSrcFileCovInfo* info = info_list_head; + if (info == NULL) { + return; + } + char *dump_path = get_dump_filename(); + fprintf(stderr, "[+] ASan report triggered - dumping log to %s\n", dump_path); + FILE *fp = fopen(dump_path, "w"); + char buff[100]; + while (info != NULL) { + for (size_t id = 0; id < info->size; ++id) { + // Do not use snprintf as it is intercepted by ASan + // snprintf(buff, 5000, "%s // %d // %d\n", info->filename, id, info->array[id]); + // fwrite(buff, strlen(buff), 1, fp); + fputs(info->filename, fp); + fputs(" // ", fp); + __san_cov_internal_ulltoa(id, buff); + fputs(buff, fp); + fputs(" // ", fp); + __san_cov_internal_ulltoa(info->array[id], buff); + fputs(buff, fp); + fputs("\n", fp); + } + info = info->next; + } + fclose(fp); + free(dump_path); + __san_free_info_list(); +} + +void __san_cov_signal_handler(int sig) { + signal(sig, SIG_DFL); + __san_cov_dump(); + raise(sig); +} + +void __san_cov_register() { + if (!not_registered) { + return; + } + /*Register the call to be called during function exit*/ + atexit(__san_cov_dump); + signal(SIGINT, __san_cov_signal_handler); + signal(SIGTERM, __san_cov_signal_handler); + signal(SIGABRT, __san_cov_signal_handler); + signal(SIGQUIT, __san_cov_signal_handler); + signal(SIGSEGV, __san_cov_signal_handler); + signal(SIGILL, __san_cov_signal_handler); + signal(SIGFPE, __san_cov_signal_handler); + signal(SIGBUS, __san_cov_signal_handler); + not_registered = false; + fprintf(stderr, "[+] ASan coverage report registered\n"); +} + +void __san_cov_abort() { + fprintf(stderr, "[+] ASan report triggered - flushing log and aborting\n"); + __san_cov_dump(); + abort(); +} + +void __san_cov_append_info(uint64_t* array, BOOL *is_initialized, char* filename, size_t size) { + // once the function is called before __asan_init + __asan_init(); + struct SanSrcFileCovInfo* info = (struct SanSrcFileCovInfo*)malloc(sizeof(struct SanSrcFileCovInfo)); + info->filename = filename; + info->array = array; + info->size = size; + info->next = NULL; + __init_san_cov_srcfile(info); + *is_initialized = true; + // fprintf(stderr, "[+] ASan coverage registered - %s\n", filename); +} + +void __san_cov_trace_pc(uint64_t* array, size_t id, BOOL *is_initialized, char* filename, size_t size) { + if (!*is_initialized) { + __san_cov_append_info(array, is_initialized, filename, size); + } + array[id]++; +} + +void __san_cov_trace_pc2(uint64_t* array, size_t id) { + array[id]++; +} \ No newline at end of file