diff --git a/.gitignore b/.gitignore index 25a4d8c9ee9366594bb507bf22d0f4a6918455fc..de978c4b79dfd167d80228b94f808a6e3861d1a2 100644 --- a/.gitignore +++ b/.gitignore @@ -28,4 +28,5 @@ libjava-core output compile_commands.json testsuite/tools* +test/c_test/sanitizer/juliet_test_suite *__pycache__* diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..a3871f9ea0c85b2a77431ca32c67e275ac96e780 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "test/c_test/sanitizer/juliet_test_suite"] + path = test/c_test/sanitizer/juliet_test_suite + url = https://gitee.com/huaijinwang/OpenArkCompiler_ASAN_juliet_test_suite.git diff --git a/src/mapleall/CMakeLists.txt b/src/mapleall/CMakeLists.txt index a581092ca23f035569113e65834fe799ea64a822..0cbbb2690b0b444d4c75b9491088ef4b0452d3a8 100644 --- a/src/mapleall/CMakeLists.txt +++ b/src/mapleall/CMakeLists.txt @@ -30,6 +30,10 @@ if(NOT ${GIT_REVISION} STREQUAL "") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGIT_REVISION=\\\"${GIT_REVISION}\\\"") endif() +OPTION(ENABLE_MAPLE_SAN "Enabling sanitizer functionalities" OFF) +if(ENABLE_MAPLE_SAN) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_MAPLE_SAN=1") +endif(ENABLE_MAPLE_SAN) add_subdirectory(maple_util) add_subdirectory(mempool) @@ -41,3 +45,4 @@ add_subdirectory(maple_be) add_subdirectory(maple_driver) add_subdirectory(maple_ir) add_subdirectory(maple_pgo) +add_subdirectory(maple_san) \ No newline at end of file diff --git a/src/mapleall/maple_be/CMakeLists.txt b/src/mapleall/maple_be/CMakeLists.txt index 2c560ef5f76befdb7e03d5b05a5de550c475677c..3b103a8d3a35f2f8df89e4329d83550ba1d9013f 100755 --- a/src/mapleall/maple_be/CMakeLists.txt +++ b/src/mapleall/maple_be/CMakeLists.txt @@ -32,6 +32,7 @@ set(deps_maple libmplmewpo libmpl2mpl libmaple + libmplsan ) set(inc_dirs diff --git a/src/mapleall/maple_driver/include/driver_options.h b/src/mapleall/maple_driver/include/driver_options.h index 8a67d6184810e3f621bed6d0e410ce119650eb51..d050ace03da37a5f4da6fe7ef9698afa2564b9e5 100644 --- a/src/mapleall/maple_driver/include/driver_options.h +++ b/src/mapleall/maple_driver/include/driver_options.h @@ -148,6 +148,9 @@ extern maplecl::Option march; extern maplecl::Option sysRoot; extern maplecl::Option specs; extern maplecl::Option folder; +#ifdef ENABLE_MAPLE_SAN +extern maplecl::Option sanitizer; +#endif /* ##################### DIGITAL Options ############################################################### */ diff --git a/src/mapleall/maple_driver/src/driver_options.cpp b/src/mapleall/maple_driver/src/driver_options.cpp index 5225496fe50cf9609c3f33d9275289d0df291fce..7fbe9967c8ae4dcdd7fee404275e4fe1e7f45c15 100644 --- a/src/mapleall/maple_driver/src/driver_options.cpp +++ b/src/mapleall/maple_driver/src/driver_options.cpp @@ -492,7 +492,11 @@ maplecl::Option specs({"-specs"}, maplecl::Option folder({"-p"}, " -p \tsave former folder when generating multiple output.\n", {driverCategory}); - +#ifdef ENABLE_MAPLE_SAN +maplecl::Option sanitizer({"-fsanitize"}, + " -fsanitize=address \tEnable AddressSanitizer.\n", + {driverCategory, meCategory}); +#endif /* ##################### DIGITAL Options ############################################################### */ maplecl::Option helpLevel({"--level"}, diff --git a/src/mapleall/maple_ir/include/mir_nodes.h b/src/mapleall/maple_ir/include/mir_nodes.h index 0cb7457b0034d6dc432af92bbe3c4d65671cf6bb..1df878e4f6debf5f38640c1a10fe4aee8935d9f4 100644 --- a/src/mapleall/maple_ir/include/mir_nodes.h +++ b/src/mapleall/maple_ir/include/mir_nodes.h @@ -2225,6 +2225,22 @@ class DassignoffNode : public UnaryStmtNode { UnaryStmtNode::SetOpnd(rhs, 0); } + StIdx GetStIdx() const { + return stIdx; + } + + void SetStIdx(StIdx s) { + stIdx = s; + } + + int32 GetOffset() { + return offset; + } + + void SetOffset(int32 o) { + offset = o; + } + public: StIdx stIdx; int32 offset = 0; diff --git a/src/mapleall/maple_me/CMakeLists.txt b/src/mapleall/maple_me/CMakeLists.txt index 18cff85b11b2a00914d8d243c8350412c6c1fde1..5937205f214acfb34189a17a2830ea68dcfbd64c 100755 --- a/src/mapleall/maple_me/CMakeLists.txt +++ b/src/mapleall/maple_me/CMakeLists.txt @@ -30,6 +30,7 @@ set(inc_dir ${MAPLEALL_ROOT}/maple_be/include/ad ${MAPLEALL_ROOT}/maple_be/include/ad/target ${MAPLE_BUILD_OUTPUT}/common/target + ${MAPLEALL_ROOT}/maple_san/include ) set(src_libmplme diff --git a/src/mapleall/maple_me/include/me_option.h b/src/mapleall/maple_me/include/me_option.h index 8fd4235979046db9b02c6ea539ee572e81eb81c7..044f1e57baea9c78ea42357a8d5b4c86f3cc4639 100644 --- a/src/mapleall/maple_me/include/me_option.h +++ b/src/mapleall/maple_me/include/me_option.h @@ -191,6 +191,10 @@ class MeOption { static bool unifyRets; static bool dumpCfgOfPhases; static bool epreUseProfile; +#ifdef ENABLE_MAPLE_SAN + static uint32 asanFlags; + static std::string fsanitizeFlags; +#endif // safety check option begin static SafetyCheckMode npeCheckMode; static bool isNpeCheckAll; diff --git a/src/mapleall/maple_me/include/me_options.h b/src/mapleall/maple_me/include/me_options.h index 9b1d4d07ef7e1b2a8755574baf0da937402895fb..d9085b896037fd2feb325537d036732b9b155608 100644 --- a/src/mapleall/maple_me/include/me_options.h +++ b/src/mapleall/maple_me/include/me_options.h @@ -139,7 +139,9 @@ extern maplecl::Option unifyrets; extern maplecl::Option lfo; extern maplecl::Option dumpCfgOfPhases; extern maplecl::Option epreUseProfile; - +#ifdef ENABLE_MAPLE_SAN +extern maplecl::Option asanFlags; +#endif } #endif /* MAPLE_ME_INCLUDE_ME_OPTIONS_H */ diff --git a/src/mapleall/maple_me/src/me_option.cpp b/src/mapleall/maple_me/src/me_option.cpp index 32da08e0105bffa21776febec184906947bc3d2d..00fe8cd116cf61d90dedf257fb134884f1e8c630 100644 --- a/src/mapleall/maple_me/src/me_option.cpp +++ b/src/mapleall/maple_me/src/me_option.cpp @@ -137,6 +137,10 @@ bool MeOption::safeRegionMode = false; bool MeOption::unifyRets = false; bool MeOption::dumpCfgOfPhases = false; bool MeOption::epreUseProfile = true; +#ifdef ENABLE_MAPLE_SAN +uint32 MeOption::asanFlags = 0x0; +std::string MeOption::fsanitizeFlags; +#endif #if MIR_JAVA std::string MeOption::acquireFuncName = "Landroid/location/LocationManager;|requestLocationUpdates|"; std::string MeOption::releaseFuncName = "Landroid/location/LocationManager;|removeUpdates|"; @@ -424,6 +428,13 @@ bool MeOption::SolveOptions(bool isDebug) { maplecl::CopyIfEnabled(vecLoopLimit, opts::me::veclooplimit); maplecl::CopyIfEnabled(ivoptsLimit, opts::me::ivoptslimit); maplecl::CopyIfEnabled(unifyRets, opts::me::unifyrets); +#ifdef ENABLE_MAPLE_SAN + maplecl::CopyIfEnabled(asanFlags, opts::me::asanFlags); + maplecl::CopyIfEnabled(fsanitizeFlags, opts::sanitizer); + if (fsanitizeFlags == "address") { + asanFlags = 0x1; + } +#endif #if MIR_JAVA maplecl::CopyIfEnabled(acquireFuncName, opts::me::acquireFunc); diff --git a/src/mapleall/maple_me/src/me_options.cpp b/src/mapleall/maple_me/src/me_options.cpp index 7f6b4be7a1c2cec139bd5ee3f809ef02957737f9..d932fb32d735f3e02126a5780b554df7ee7356df 100644 --- a/src/mapleall/maple_me/src/me_options.cpp +++ b/src/mapleall/maple_me/src/me_options.cpp @@ -721,4 +721,10 @@ maplecl::Option epreUseProfile({"--epreuseprofile"}, " --no-epreuseprofile \tDisable profile-guided epre phase\n", {meCategory}, maplecl::DisableWith("--no-epreuseprofile")); +#ifdef ENABLE_MAPLE_SAN +maplecl::Option asanFlags({"--sanitizer", "--san", "-san"}, + " --sanitizer=FLAGS \tEnable instrumenting sanitizer according to the given FLAGS\n" + " --sanitizer=0 \tDisable instrumenting sanitizer\n", + {meCategory}); +#endif } diff --git a/src/mapleall/maple_me/src/me_phase_manager.cpp b/src/mapleall/maple_me/src/me_phase_manager.cpp index 1ff8fe602cee92ceb97fd5696aa320bd8e3d9b14..d349a58591da6977e78ba3e2d3f8e01571de2306 100644 --- a/src/mapleall/maple_me/src/me_phase_manager.cpp +++ b/src/mapleall/maple_me/src/me_phase_manager.cpp @@ -285,4 +285,5 @@ MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MEDse, dse) MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MEABCOpt, abcopt) MAPLE_TRANSFORM_PHASE_REGISTER(MEEmit, meemit) MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(ProfileGenEmit, profgenEmit); + } // namespace maple diff --git a/src/mapleall/maple_phase/include/phases.def b/src/mapleall/maple_phase/include/phases.def index 3ccf3a6e6c06d955a7882a164e23b8c4d258e763..cf5ef10ad2285c642c76e49a3050e5dbdd57a8fa 100644 --- a/src/mapleall/maple_phase/include/phases.def +++ b/src/mapleall/maple_phase/include/phases.def @@ -30,6 +30,10 @@ ADDMODULEPHASE("ConstantFold", CLANG && Options::O2 && Options::enableGInline) ADDMODULEPHASE("inline", CLANG && (Options::O2 && Options::useInline && Options::profileUse)) ADDMODULEPHASE("ipaclone", CLANG && Options::O2 && Options::enableIPAClone) ADDMODULEPHASE("ProfileGenPM", CLANG && Options::profileGen) +#ifdef ENABLE_MAPLE_SAN +// Address Sanitizer Phase (make it work before ME optimization) +ADDMAPLEPHASE("doModuleAsan", MeOption::asanFlags > 0); +#endif // me phase manager is also a phase itself. ADDMAPLEPHASE("meFuncPM", IsRunMe() && MeOption::optLevel > 0); ADDMODULEPHASE("clinit", JAVALANG) @@ -117,3 +121,8 @@ ADDMAPLEMEPHASE("bblayout", MeOption::optLevel >= 2 || JAVALANG) ADDMAPLEMEPHASE("meemit", MeOption::optLevel >= 2 || JAVALANG) ADDMAPLEMEPHASE("meverify", JAVALANG && MeOption::meVerify) #endif + +#ifdef SAN_PHASE +ADDMAPLEMEPHASE("doAsan", (MeOption::asanFlags & 0x01) > 0) +ADDMAPLEMEPHASE("doUbsanBound", (MeOption::asanFlags & 0x04) > 0) +#endif diff --git a/src/mapleall/maple_san/.gitignore b/src/mapleall/maple_san/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..7309491b23038bfa0ef32eee087268946ab0ecee --- /dev/null +++ b/src/mapleall/maple_san/.gitignore @@ -0,0 +1,4 @@ +C +bzip2 +gzip +test diff --git a/src/mapleall/maple_san/CMakeLists.txt b/src/mapleall/maple_san/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..89eb8801535b2f89634a7ac54171e3834c1c6281 --- /dev/null +++ b/src/mapleall/maple_san/CMakeLists.txt @@ -0,0 +1,42 @@ +set(inc_dirs + ${MAPLEALL_ROOT}/maple_san/include + ${MAPLEALL_ROOT}/maple_ipa/include + ${MAPLEALL_ROOT}/maple_ir/include + ${MAPLEALL_ROOT}/maple_me/include + ${MAPLEALL_ROOT}/maple_phase/include + ${MAPLEALL_ROOT}/maple_util/include + ${MAPLEALL_ROOT}/maple_driver/include + ${MAPLEALL_ROOT}/mempool/include + ${MAPLEALL_ROOT}/mpl2mpl/include + ${MAPLEALL_ROOT}/maple_be/include/cg + ${MAPLEALL_ROOT}/maple_ipa/include/old + ${MAPLEALL_ROOT}/maple_pgo/include + ${THIRD_PARTY_ROOT}/bounds_checking_function/include +) + +set(src_libmplsan + src/asan_phases.cpp + src/asan_mapping.cpp + src/asan_stackvar.cpp + src/asan_module.cpp + src/asan_function.cpp + src/san_common.cpp + src/ubsan_phases.cpp + src/ubsan_bounds.cpp + src/san_phase_manager.cpp +) + +add_library(libmplsan SHARED ${src_libmplsan}) + +if(ENABLE_MAPLE_SAN) + set_target_properties(libmplsan PROPERTIES + COMPILE_FLAGS "-DENABLE_MAPLE_SAN=1" + INCLUDE_DIRECTORIES "${inc_dirs}" + LINK_LIBRARIES "" + ARCHIVE_OUTPUT_DIRECTORY ${MAPLE_BUILD_OUTPUT}/lib/${HOST_ARCH} + ) +endif(ENABLE_MAPLE_SAN) +# OPTION(ASAN_ENABLE_RBTREE "Use RBTree for shadow memory management in ASan" OFF) +# if(ASAN_ENABLE_RBTREE) +# target_compile_definitions(mplsan PUBLIC ENABLERBTREE) +# endif(ASAN_ENABLE_RBTREE) diff --git a/src/mapleall/maple_san/include/asan_function.h b/src/mapleall/maple_san/include/asan_function.h new file mode 100644 index 0000000000000000000000000000000000000000..2efd880188577b465c49f9ff4492415f61c40492 --- /dev/null +++ b/src/mapleall/maple_san/include/asan_function.h @@ -0,0 +1,126 @@ +// +// Created by wchenbt on 4/4/2021. +// +#ifdef ENABLE_MAPLE_SAN + +#ifndef MAPLE_SAN_ASAN_FUNCTION_H +#define MAPLE_SAN_ASAN_FUNCTION_H + +#include "asan_mapping.h" +#include "me_cfg.h" +#include "me_function.h" +#include "me_ssa.h" +#include "mir_module.h" +#include "san_common.h" + +namespace maple { + +struct MemoryAccess { + StmtNode *stmtNode; + bool isWrite; + uint64_t typeSize; + size_t alignment; + BaseNode *ptrOperand; +}; + +// Accesses sizes are powers of two: 1, 2, 4, 8, 16. +class AddressSanitizer { + public: + AddressSanitizer(MIRModule &module, PreAnalysis *symbolInteresting) + : module(&module), Mapping(getShadowMapping()), preAnalysis(symbolInteresting) { + LongSize = kSizeOfPtr * 8; + IntPtrPrim = LongSize == sizeof(int32) ? PTY_i32 : PTY_i64; + IntPtrTy = GlobalTables::GetTypeTable().GetPrimType(IntPtrPrim); + } + + bool instrumentFunction(MeFunction &F); + + void instrumentAddress(StmtNode *InsertBefore, BaseNode *Addr, uint64_t TypeSize, bool IsWrite, + BaseNode *SizeArgument); + + void instrumentUnusualSizeOrAlignment(StmtNode *InsertBefore, BaseNode *Addr, uint64_t TypeSize, + bool IsWrite); + + private: + friend class FunctionStackPoisoner; + + void instrumentMop(StmtNode *I, std::vector &memoryAccess); + + void initializeCallbacks(const MIRModule &mirModule); + + bool isInterestingSymbol(const MIRSymbol &symbol); + + bool isInterestingAlloca(const UnaryNode &unaryNode); + + void instrumentMemIntrinsic(IntrinsiccallNode *stmtNode); + + void maybeInsertDynamicShadowAtFunctionEntry(const MeFunction &F); + + BaseNode *memToShadow(BaseNode *Shadow, MIRBuilder &mirBuilder); + + /// If it is an interesting memory access, return the PointerOperand + /// and set IsWrite/Alignment. Otherwise return nullptr. + std::vector isInterestingMemoryAccess(StmtNode *stmtNode); + + MemoryAccess getIassignMemoryAccess(IassignNode &iassign); + + MemoryAccess getIassignoffMemoryAccess(IassignoffNode &iassignoff); + + MemoryAccess getIreadMemoryAccess(IreadNode &iread, StmtNode *stmtNode); + + StmtNode *splitIfAndElseBlock(Opcode op, StmtNode *elsePart, const BinaryNode *cmpStmt); + + CallNode *generateCrashCode(MIRSymbol *Addr, bool IsWrite, size_t AccessSizeIndex, BaseNode *SizeArgument); + + BinaryNode *createSlowPathCmp(StmtNode *InsBefore, BaseNode *AddrLong, BaseNode *ShadowValue, uint64_t TypeSize); + + void SanrazorProcess(MeFunction &mefunc, std::set &userchecks, + std::map> &brgoto_map, std::map &stmt_to_bbID, + std::map &stmt_id_to_stmt, std::vector &stmt_id_list, int check_env); + + struct FunctionStateRAII { + AddressSanitizer *Phase; + + FunctionStateRAII(AddressSanitizer *Phase) : Phase(Phase) { + assert(Phase->ProcessedSymbols.empty() && "last pass forgot to clear cache"); + assert(!Phase->LocalDynamicShadow); + } + + ~FunctionStateRAII() { + Phase->LocalDynamicShadow = nullptr; + Phase->ProcessedSymbols.clear(); + Phase->preAnalysis->usedInAddrof.clear(); + } + }; + + size_t LongSize; + MeFunction *func; + MIRModule *module; + PrimType IntPtrPrim; + MIRType *IntPtrTy; + ShadowMapping Mapping; + MIRFunction *AsanHandleNoReturnFunc; + + // These arrays is indexed by AccessIsWrite and log2(AccessSize). + MIRFunction *AsanErrorCallback[2][kNumberOfAccessSizes]; + // These arrays is indexed by AccessIsWrite + MIRFunction *AsanErrorCallbackSized[2]; + + MIRFunction *AsanMemmove, *AsanMemcpy, *AsanMemset; + MIRFunction *AsanRBTSafetyCheck, *AsanRBTStackInsert, *AsanRBTStackDelete, *AsanRBTPoisonRegion, + *AsanRBTUnpoisonRegion; + + MIRFunction *__san_cov_flush = nullptr; + + BaseNode *LocalDynamicShadow = nullptr; + + std::map ProcessedSymbols; + std::map ProcessedAllocas; + + PreAnalysis *preAnalysis; +}; + +} // namespace maple +#endif // MAPLE_SAN_ASAN_FUNCTION_H + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/include/asan_interfaces.h b/src/mapleall/maple_san/include/asan_interfaces.h new file mode 100644 index 0000000000000000000000000000000000000000..de336098c70a9c029c20da611bffc97862a323ff --- /dev/null +++ b/src/mapleall/maple_san/include/asan_interfaces.h @@ -0,0 +1,26 @@ +// +// Created by wchenbt on 2021/3/28. +// +#ifdef ENABLE_MAPLE_SAN + +#ifndef MAPLE_SAN_INCLUDE_ASAN_MESSAGES_H +#define MAPLE_SAN_INCLUDE_ASAN_MESSAGES_H + +namespace maple { + +const char *const kAsanModuleCtorName = "asan.module_ctor"; +const char *const kAsanModuleDtorName = "asan.module_dtor"; +const char *const kAsanInitName = "__asan_init"; +const char *const kAsanHandleNoReturnName = "__asan_handle_no_return"; +const char *const kAsanRegisterGlobalsName = "__asan_register_globals"; +const char *const kAsanUnregisterGlobalsName = "__asan_unregister_globals"; +const char *const kAsanSetShadowPrefix = "__asan_set_shadow_"; +const char *const kAsanReportErrorTemplate = "__asan_report_"; +const char *const kAsanShadowMemoryDynamicAddress = "__asan_shadow_memory_dynamic_address"; +const char *const kAsanAllocaPoison = "__asan_alloca_poison"; +const char *const kAsanAllocasUnpoison = "__asan_allocas_unpoison"; + +} // namespace maple +#endif // MAPLE_SAN_INCLUDE__ASAN_MESSAGES_H + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/include/asan_mapping.h b/src/mapleall/maple_san/include/asan_mapping.h new file mode 100644 index 0000000000000000000000000000000000000000..2fc761df84709c841cc2b64fd54dd7e22dbc6aac --- /dev/null +++ b/src/mapleall/maple_san/include/asan_mapping.h @@ -0,0 +1,48 @@ +// +// Created by wchenbt on 4/1/21. +// +#ifdef ENABLE_MAPLE_SAN + +#ifndef MAPLE_SAN_INCLUDE_ASAN_MAPPING_H +#define MAPLE_SAN_INCLUDE_ASAN_MAPPING_H + +#include + +#include "san_common.h" +namespace maple { +const uint64_t kDefaultShadowScale = 3; +const uint64_t kDefaultShadowOffset32 = 1ULL << 29; // 0x20000000 +const uint64_t kDefaultShadowOffset64 = 1ULL << 44; +const uint64_t kDynamicShadowSentinel = std::numeric_limits::max(); +const uint64_t kSmallX86_64ShadowOffsetBase = 0x7FFFFFFF; // < 2G. +const uint64_t kSmallX86_64ShadowOffsetAlignMask = ~0xFFFULL; +const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; + +struct ShadowMapping { + int Scale; + uint64_t Offset; + bool OrShadowOffset; +}; + +inline ShadowMapping getShadowMapping() { + ShadowMapping Mapping; + Mapping.Scale = kDefaultShadowScale; +#if TARGAARCH64 + Mapping.Offset = kAArch64_ShadowOffset64; + Mapping.OrShadowOffset = false; +#elif TARGX86_64 + Mapping.Offset = (kSmallX86_64ShadowOffsetBase & (kSmallX86_64ShadowOffsetAlignMask << Mapping.Scale)); + Mapping.OrShadowOffset = !(Mapping.Offset & (Mapping.Offset - 1)) && Mapping.Offset != kDynamicShadowSentinel; +#elif TARGX86 || TARGARM32 || TARGVM + Mapping.Offset = kDefaultShadowOffset32; + Mapping.OrShadowOffset = !(Mapping.Offset & (Mapping.Offset - 1)) && Mapping.Offset != kDynamicShadowSentinel; +#else + Mapping.Offset = kDefaultShadowOffset64; + Mapping.OrShadowOffset = !(Mapping.Offset & (Mapping.Offset - 1)) && Mapping.Offset != kDynamicShadowSentinel; +#endif + return Mapping; +} +} // namespace maple +#endif // MAPLE_SAN_INCLUDE_ASAN_MAPPING_H + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/include/asan_module.h b/src/mapleall/maple_san/include/asan_module.h new file mode 100644 index 0000000000000000000000000000000000000000..45bd73626dd31071530a280f09ae321e84aac581 --- /dev/null +++ b/src/mapleall/maple_san/include/asan_module.h @@ -0,0 +1,65 @@ +// +// Created by wchenbt on 4/4/2021. +// +#ifdef ENABLE_MAPLE_SAN + +#ifndef MAPLE_SAN_ASAN_MODULE_H +#define MAPLE_SAN_ASAN_MODULE_H + +#include "asan_mapping.h" +#include "asan_stackvar.h" +#include "mir_function.h" +#include "mir_module.h" +#include "san_common.h" + +namespace maple { +class ModuleAddressSanitizer { + public: + ModuleAddressSanitizer(MIRModule &module) : module(&module), Mapping(getShadowMapping()) { + int longsize = kSizeOfPtr * 8; + IntPtrPrim = longsize == sizeof(int32) ? PTY_i32 : PTY_i64; + IntPtrTy = GlobalTables::GetTypeTable().GetPrimType(IntPtrPrim); + GetGlobalSymbolUsage(); + } + + bool instrumentModule(); + + private: + void initializeCallbacks(); + + void GetGlobalSymbolUsage(); + + bool InstrumentGlobals(BlockNode *ctorToBeInserted); + bool ShouldInstrumentGlobal(MIRSymbol *var); + + void InstrumentGlobalsWithMetadataArray(BlockNode *ctorToBeInserted, const std::vector ExtendedGlobals, + std::vector MetadataInitializers); + + BlockNode *CreateCtorAndInitFunctions(const std::string CtorName, const std::string InitName, + const MapleVector InitArgs); + + BlockNode *CreateModuleDtor(); + + size_t MinRedzoneSizeForGlobal() const { + return std::max(32U, 1U << Mapping.Scale); + } + + MIRModule *module; + + PrimType IntPtrPrim; + MIRType *IntPtrTy; + ShadowMapping Mapping; + + MIRFunction *AsanRegisterGlobals, *AsanUnregisterGlobals; + + MIRFunction *AsanCtorFunction = nullptr; + MIRFunction *AsanDtorFunction = nullptr; + + std::map, std::set> symbolUsedInStmt; + std::map, std::set> symbolUsedInInit; +}; +} // namespace maple + +#endif // MAPLE_SAN_ASAN_MODULE_H + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/include/asan_phases.h b/src/mapleall/maple_san/include/asan_phases.h new file mode 100644 index 0000000000000000000000000000000000000000..daa8dae15a5790f2e6ddadbb7f8df7d608cb90b4 --- /dev/null +++ b/src/mapleall/maple_san/include/asan_phases.h @@ -0,0 +1,28 @@ +#ifdef ENABLE_MAPLE_SAN + +#ifndef MAPLE_SAN_INCLUDE_ASAN_PHASES_H +#define MAPLE_SAN_INCLUDE_ASAN_PHASES_H + +#include +#include "maple_phase.h" +#include "san_common.h" +#include "maple_phase.h" +#include "maple_phase_manager.h" +#include "me_phase_manager.h" + + +namespace maple { + MAPLE_FUNC_PHASE_DECLARE_BEGIN(MEDoVarCheck, MeFunction) + PreAnalysis* GetResult(); + private: + void GetAnalysisDependence(maple::AnalysisDep &aDep) const override; + PreAnalysis* result = nullptr; + MAPLE_FUNC_PHASE_DECLARE_END + + MAPLE_FUNC_PHASE_DECLARE(MEDoAsan, MeFunction) + +} // namespace maple + +#endif // MAPLE_SAN_INCLUDE_SAN_PHASES_H + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/include/asan_stackvar.h b/src/mapleall/maple_san/include/asan_stackvar.h new file mode 100644 index 0000000000000000000000000000000000000000..2b7c726fd13d88f98ffad26d16bf57ea1c13cf6d --- /dev/null +++ b/src/mapleall/maple_san/include/asan_stackvar.h @@ -0,0 +1,93 @@ +// +// Created by wchenbt on 3/4/2021. +// +#ifdef ENABLE_MAPLE_SAN + +#ifndef MAPLE_SAN_ASAN_STACKVAR_H +#define MAPLE_SAN_ASAN_STACKVAR_H + +#include + +#include "asan_function.h" +#include "asan_interfaces.h" +#include "me_phase_manager.h" +#include "module_phase_manager.h" +#include "san_common.h" + +namespace maple { + +class FunctionStackPoisoner { + public: + FunctionStackPoisoner(MeFunction &function, AddressSanitizer &asan); + + bool runOnFunction(); + + void processStackVariable(); + + void unpoisonDynamicAllocas(); + + void initializeCallbacks(const MIRModule &M); + + void createDynamicAllocasInitStorage(); + + bool isInFirstBlock(StmtNode *stmtNode); + + BaseNode *GetTransformedNode(MIRSymbol *oldVar, MIRSymbol *newVar, BaseNode *baseNode); + + void replaceAllUsesWith(MIRSymbol *oldVar, MIRSymbol *newVar); + + void handleDynamicAllocaCall(ASanDynaVariableDescription *AI); + + MIRSymbol *createAllocaForLayout(StmtNode *insBefore, MIRBuilder *mirBuilder, const ASanStackFrameLayout &L); + + void unpoisonDynamicAllocasBeforeInst(StmtNode *InstBefore); + + void copyToShadow(const std::vector ShadowMask, const std::vector ShadowBytes, MIRBuilder *mirBuilder, + BaseNode *ShadowBase, StmtNode *InsBefore); + + void copyToShadow(const std::vector ShadowMask, const std::vector ShadowBytes, size_t Begin, size_t End, + MIRBuilder *mirBuilder, BaseNode *ShadowBase, StmtNode *InsBefore); + + void copyToShadowInline(const std::vector ShadowMask, const std::vector ShadowBytes, size_t Begin, size_t End, + MIRBuilder *mirBuilder, BaseNode *ShadowBase, StmtNode *InsBefore); + + bool isFuncCallArg(const MIRSymbol *const symbolPtr) const; + bool isFuncCallArg(const std::string symbolName) const; + + std::set GetStackVarReferedByCallassigned(); + + AddressSanitizer &ASan; + + MeFunction *meFunction; + MIRFunction *mirFunction; + + MIRModule *module; + MIRType *IntptrTy; + MIRType *IntptrPtrTy; + ShadowMapping Mapping; + + unsigned StackAlignment; + MIRSymbol *DynamicAllocaLayout = nullptr; + + std::vector RetVec; + std::vector stackVariableDesc; + std::vector dynamicAllocaDesc; + std::set callArgSymbols; + std::set callArgSymbolNames; + + MIRFunction *AsanSetShadowFunc[0x100] = {}; + MIRFunction *AsanAllocaPoisonFunc, *AsanAllocasUnpoisonFunc; + + bool HasNonEmptyInlineAsm = false; + bool HasReturnsTwiceCall = false; + + std::map isUsedInAlloca; + private: + void collectLocalVariablesWithoutAlloca(); + void collectLocalVariablesWithAlloca(); + void collectDescFromUnaryStmtNode(UnaryStmtNode &assignNode); +}; +} // namespace maple +#endif // MAPLE_SAN_ASAN_STACKVAR_H + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/include/san_common.h b/src/mapleall/maple_san/include/san_common.h new file mode 100644 index 0000000000000000000000000000000000000000..425a8a08719f0af1e3467ebcef0edcbc7fb96342 --- /dev/null +++ b/src/mapleall/maple_san/include/san_common.h @@ -0,0 +1,165 @@ +#ifdef ENABLE_MAPLE_SAN + +#ifndef MAPLE_SAN_INCLUDE_SAN_COMMON_H +#define MAPLE_SAN_INCLUDE_SAN_COMMON_H +#include "me_function.h" +#include "me_phase_manager.h" +#include "mir_builder.h" +#include "mir_function.h" +#include "mir_module.h" +#include "mir_nodes.h" +#include "types_def.h" + +namespace maple { + +#if TARGX86_64 || TARGAARCH64 +#define LOWERED_PTR_TYPE PTY_a64 +constexpr uint8 kSizeOfPtr = 8; +#elif TARGX86 || TARGARM32 || TARGVM +#define LOWERED_PTR_TYPE PTY_a32 +constexpr uint8 kSizeOfPtr = 4; +#else +#error "Unsupported target" +#endif + +struct ASanStackVariableDescription { + std::string Name; // Name of the variable that will be displayed by asan + size_t Size; // Size of the variable in bytes. + size_t LifetimeSize; // Size in bytes to use for lifetime analysis check. + size_t Alignment; // Alignment of the variable (power of 2). + MIRSymbol *Symbol; // The actual AllocaInst. + StmtNode *AllocaInst; + size_t Offset; // Offset from the beginning of the frame; + unsigned Line; // Line number. + + bool operator<(const ASanStackVariableDescription &rhs) const { + return this->Line > rhs.Line; + } +}; + +struct ASanDynaVariableDescription { + std::string Name; // Name of the variable that will be displayed by asan + BaseNode *Size; // Size of the variable in bytes. + StmtNode *AllocaInst; + size_t Offset; // Offset from the beginning of the frame; + unsigned Line; // Line number. + + bool operator<(const ASanDynaVariableDescription &rhs) const { + return this->Line > rhs.Line; + } +}; + +// Output data struct for ComputeASanStackFrameLayout. +struct ASanStackFrameLayout { + size_t Granularity; // Shadow granularity. + size_t FrameAlignment; // Alignment for the entire frame. + size_t FrameSize; // Size of the frame in bytes. +}; + +// Struct to contain the information for performing set check +// For elimination of san check +struct set_check { + std::vector opcode; // 1. opcode enum Opcode : uint8 + std::vector register_terminal; // 2. register_terminal -> cannot further expand + std::stack register_live; // 3. register_live -> for further expansion + std::vector var_terminal; // 4. var_terminal -> cannot further expand + std::stack var_live; // 5. var_live -> for further expansion + std::vector const_int64; // 6. const, we only track int64 or kConstInt + std::vector const_str; // 7. const str ,we only store the index + std::vector type_num; // 8. Type, but we will just track the fieldID for simplicity iread->GetFieldID() +}; + +struct san_struct { + int stmtID; + int tot_ctr; + int l_ctr; + int r_ctr; +}; + +class PreAnalysis : public AnalysisResult { + public: + PreAnalysis(MemPool &memPoolParam) : AnalysisResult(&memPoolParam){}; + + ~PreAnalysis() override = default; + + std::vector usedInAddrof; +}; + +static const size_t kMinAlignment = 16; +static const unsigned kAllocaRzSize = 32; +static const size_t kNumberOfAccessSizes = 5; + +static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3; +static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E; +static const size_t kMinStackMallocSize = 1 << 6; // 64B +static const size_t kMaxStackMallocSize = 1 << 16; // 64K +static const int kMaxAsanStackMallocSizeClass = 10; + +// These magic constants should be the same as in +// in asan_internal.h from ASan runtime in compiler-rt. +static const int kAsanStackLeftRedzoneMagic = 0xf1; +static const int kAsanStackMidRedzoneMagic = 0xf2; +static const int kAsanStackRightRedzoneMagic = 0xf3; +static const int kAsanStackUseAfterScopeMagic = 0xf8; + +bool isTypeSized(MIRType *type); + +int computeRedZoneField(MIRType *type); + +size_t TypeSizeToSizeIndex(uint32_t TypeSize); + +std::vector GetGlobalVaribles(const MIRModule &mirModule); + +void appendToGlobalCtors(const MIRModule &mirModule, const MIRFunction *func); + +void appendToGlobalDtors(const MIRModule &mirModule, const MIRFunction *func); + +MIRFunction *getOrInsertFunction(MIRBuilder *mirBuilder, const char *name, MIRType *retType, + std::vector argTypes); + +std::vector GetShadowBytes(const std::vector &Vars, + const ASanStackFrameLayout &Layout); + +MIRAddrofConst *createSourceLocConst(MIRModule &mirModule, MIRSymbol *Var, PrimType primType); + +MIRAddrofConst *createAddrofConst(const MIRModule &mirModule, const MIRSymbol *mirSymbol, PrimType primType); + +MIRStrConst *createStringConst(const MIRModule &mirModule, const std::basic_string& Str, PrimType primType); + +std::string ComputeASanStackFrameDescription(const std::vector &vars); + +std::vector GetShadowBytesAfterScope(const std::vector &Vars, + const ASanStackFrameLayout &Layout); + +MIRSymbol *getOrCreateSymbol(MIRBuilder *mirBuilder, const TyIdx tyIdx, const std::string &name, MIRSymKind mClass, + MIRStorageClass sClass, MIRFunction *func, uint8 scpID); + +ASanStackFrameLayout ComputeASanStackFrameLayout(std::vector &Vars, size_t Granularity, + size_t MinHeaderSize); +// Start of Sanrazor +int SANRAZOR_MODE(); +CallNode *retCallCOV(const MeFunction &func, int bb_id, int stmt_id, int br_true, int type_of_check); +void recursion(BaseNode *stmt, std::vector &stmt_reg); +bool isReg_redefined(BaseNode *stmt, std::vector &stmt_reg); +bool isVar_redefined(BaseNode *stmt, std::vector &stmt_reg); +void dep_expansion(BaseNode *stmt, set_check &dep, std::map> reg_to_stmt, + std::map> var_to_stmt, const MeFunction &func); +void print_dep(set_check dep); +template +void print_stack(std::stack &st); +template +bool compareVectors(const std::vector& a, const std::vector& b); +int getIndex(std::vector v, StmtNode *K); +StmtNode *retLatest_Regassignment(StmtNode *stmt, int32 register_number); +StmtNode *retLatest_Varassignment(StmtNode *stmt, uint32 var_number); +set_check commit(set_check old, set_check latest); +void gen_register_dep(StmtNode *stmt, set_check &br_tmp, std::map> reg_to_stmt, + std::map> var_to_stmt, const MeFunction& func); +bool sat_check(const set_check& a, const set_check& b); +std::map gen_dynmatch(std::string file_name); +bool dynamic_sat(const san_struct& a, const san_struct& b, bool SCSC); + +} // end namespace maple +#endif // MAPLE_SAN_INCLUDE_SAN_COMMON_H + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/include/san_phase_manager.h b/src/mapleall/maple_san/include/san_phase_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..1ac31e75974e578d57e174a3fbb43b3d44b5ba82 --- /dev/null +++ b/src/mapleall/maple_san/include/san_phase_manager.h @@ -0,0 +1,33 @@ +#ifdef ENABLE_MAPLE_SAN + +#ifndef MAPLE_SAN_INCLUDE_SAN_PHASES_MANAGER_H +#define MAPLE_SAN_INCLUDE_SAN_PHASES_MANAGER_H + +#include "asan_module.h" +#include "maple_phase.h" +#include "maple_phase_manager.h" +#include "mempool.h" + + +namespace maple { + +class MEModuleDoAsan : public FunctionPM { +public: + explicit MEModuleDoAsan(MemPool *memPool) : FunctionPM(memPool, &id) {} + PHASECONSTRUCTOR(MEModuleDoAsan); + bool PhaseRun(MIRModule &m) override; + std::string PhaseName() const override; + ~MEModuleDoAsan() override {} +private: + bool FuncLevelRun(MeFunction &meFunc, AnalysisDataManager &serialADM); + void GetAnalysisDependence(AnalysisDep &aDep) const override; + void DoPhasesPopulate(const MIRModule &mirModule); + + std::string meInput = ""; +}; + +} // namespace maple + +#endif + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/include/ubsan_bounds.h b/src/mapleall/maple_san/include/ubsan_bounds.h new file mode 100644 index 0000000000000000000000000000000000000000..d5f9506a2d46457032ee982ff2b94b52bf9eaa78 --- /dev/null +++ b/src/mapleall/maple_san/include/ubsan_bounds.h @@ -0,0 +1,59 @@ +// +// Created by wchenbt on 9/5/2021. +// +#ifdef ENABLE_MAPLE_SAN + +#ifndef MAPLE_SAN_UBSAN_BOUNDS_H +#define MAPLE_SAN_UBSAN_BOUNDS_H +#include "me_function.h" +#include "me_phase_manager.h" +#include "mir_builder.h" +#include "mir_module.h" + +namespace maple { + +class ArrayInfo { + public: + StmtNode *usedStmt; + MIRArrayType *arrayType; + size_t neededSize; + std::vector elemType; + std::vector offset; + std::vector dimensions; + std::vector> checks; + + ArrayInfo(StmtNode *usedStmt, MIRArrayType *arrayType, ArrayNode *arrayNode); + size_t GetElementSize(); + void SetNeededSize(size_t neededSize); + std::string GetArrayTypeName(size_t dim); +}; + +class BoundCheck { + public: + BoundCheck(MeFunction *func); + bool addBoundsChecking(); + void initializeCallbacks(); + void insertBoundsCheck(ArrayInfo *arrayInfo, size_t dim); + void getBoundsCheckCond(ArrayInfo *arrayInfo, BlockNode *body, size_t dim); + + std::vector getArrayInfo(StmtNode *stmtNode); + + MeFunction *func; + MIRBuilder *mirBuilder; + MIRModule *mirModule; + MIRStructType *sourceLocType; + MIRStructType *typeDescriptor; + MIRStructType *outofBoundsData; + MIRFunction *ubsanHandler; + + MIRSymbol *symbol_1; + MIRSymbol *symbol_2; + MIRSymbol *outofBound; + MIRSymbol *sourceLoc; + MIRSymbol *arrayType; + MIRSymbol *indexType; +}; +} // namespace maple +#endif // MAPLE_SAN_UBSAN_BOUNDS_H + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/include/ubsan_phases.h b/src/mapleall/maple_san/include/ubsan_phases.h new file mode 100644 index 0000000000000000000000000000000000000000..055c07017fc1eb7d2516ca1d41fe505238261445 --- /dev/null +++ b/src/mapleall/maple_san/include/ubsan_phases.h @@ -0,0 +1,17 @@ +#ifdef ENABLE_MAPLE_SAN + +#ifndef MAPLE_SAN_INCLUDE_UBSAN_PHASES_H +#define MAPLE_SAN_INCLUDE_UBSAN_PHASES_H + +#include +#include "maple_phase.h" +#include "san_common.h" + + +namespace maple { + MAPLE_FUNC_PHASE_DECLARE(MEDoUbsanBound, MeFunction) +} // namespace maple + +#endif // MAPLE_SAN_INCLUDE_UBSAN_PHASES_H + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/src/asan_function.cpp b/src/mapleall/maple_san/src/asan_function.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ffbbf0be66c15e7abb522dd668a01f6385f47a8b --- /dev/null +++ b/src/mapleall/maple_san/src/asan_function.cpp @@ -0,0 +1,979 @@ +#ifdef ENABLE_MAPLE_SAN + +#include "asan_function.h" + +#include + +#include +#include + +#include "asan_interfaces.h" +#include "asan_stackvar.h" +#include "me_cfg.h" +#include "me_function.h" +#include "mir_builder.h" +#include "mpl_logging.h" +#include "opcode_info.h" +#include "san_common.h" + +namespace maple { +bool isBlacklist(int k) { + return (k == 120 || k == 125); +} + +void doInstrumentAddress(AddressSanitizer *Phase, StmtNode *InsertBefore, BaseNode *Addr, + size_t Alignment, size_t Granularity, uint64_t TypeSize, bool IsWrite) { + // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check + // if the data is properly aligned. + if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 || TypeSize == 128) && + (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8)) { + Phase->instrumentAddress(InsertBefore, Addr, TypeSize, IsWrite, nullptr); + } else { + Phase->instrumentUnusualSizeOrAlignment(InsertBefore, Addr, TypeSize, IsWrite); + } +} + +void dumpFunc(MeFunction &mefunc) { + StmtNodes &stmtNodes = mefunc.GetMirFunc()->GetBody()->GetStmtNodes(); + for (StmtNode &stmt : stmtNodes) { + stmt.Dump(0); + } +} + +bool AddressSanitizer::instrumentFunction(MeFunction &mefunc) { + MIRBuilder *builder = mefunc.GetMIRModule().GetMIRBuilder(); + this->func = &mefunc; + if (mefunc.GetMirFunc()->GetAttr(FUNCATTR_extern)) { + return false; + } + if (mefunc.GetName().find("__asan_") == 0 || mefunc.GetName().find("__san_cov_") == 0) { + return false; + } + + bool functionModified = false; + + LogInfo::MapleLogger() << "ASAN instrumenting: " << mefunc.GetName() << "\n"; + + initializeCallbacks(mefunc.GetMIRModule()); + + // When exit this function, the destructor will clean relatives + FunctionStateRAII cleanupObj(this); + + maybeInsertDynamicShadowAtFunctionEntry(mefunc); + + std::vector toInstrument; + std::vector noReturnCalls; + + // Definition for sanrazor + + // Destination map back to initiate stmt + std::map> brgoto_map; + // Map the stmt by using label ID to the basic block ID, for verification in the coverage + std::map stmt_to_bbID; + + // Distinguishing user checks or sanitzer checks + std::set userchecks; + + std::map stmt_id_to_stmt; + + for (StmtNode &stmt : mefunc.GetMirFunc()->GetBody()->GetStmtNodes()) { + toInstrument.push_back(&stmt); + if (CallNode *callNode = dynamic_cast(&stmt)) { + MIRFunction *calleeFunc = GlobalTables::GetFunctionTable().GetFunctionFromPuidx(callNode->GetPUIdx()); + if (calleeFunc->NeverReturns() || calleeFunc->GetName() == "exit") { + noReturnCalls.push_back(callNode); + } + } + if (stmt.GetOpCode() == OP_brtrue || stmt.GetOpCode() == OP_brfalse) { + userchecks.insert(&stmt); + } + } + + int numInstrumented = 0; + + for (auto stmt : toInstrument) { + std::vector memAccVec = isInterestingMemoryAccess(stmt); + if (memAccVec.size() > 0) { + instrumentMop(stmt, memAccVec); + } else { + instrumentMemIntrinsic(dynamic_cast(stmt)); + } + numInstrumented++; + } + + FunctionStackPoisoner fsp(mefunc, *this); + bool changedStack = fsp.runOnFunction(); + + for (auto stmt : noReturnCalls) { + MapleVector args(builder->GetCurrentFuncCodeMpAllocator()->Adapter()); + CallNode *callNode = builder->CreateStmtCall(AsanHandleNoReturnFunc->GetPuidx(), args); + callNode->InsertAfterThis(*stmt); + } + + int check_env = SANRAZOR_MODE(); + std::vector stmt_id_list; + bool doSanrazor = (check_env > 0) && (numInstrumented > 0 || changedStack || !noReturnCalls.empty()); + if (doSanrazor) { + functionModified = true; + SanrazorProcess(mefunc, userchecks, brgoto_map, stmt_to_bbID, stmt_id_to_stmt, stmt_id_list, check_env); + } + // dump IRs of each block + // dumpFunc(mefunc); + LogInfo::MapleLogger() << "ASAN done instrumenting: " << functionModified << " " << mefunc.GetName() << "\n"; + + return functionModified; +} + +void AddressSanitizer::SanrazorProcess(MeFunction &mefunc, std::set &userchecks, + std::map> &brgoto_map, + std::map &stmt_to_bbID, + std::map &stmt_id_to_stmt, std::vector &stmt_id_list, + int check_env) { + MIRBuilder *builder = mefunc.GetMIRModule().GetMIRBuilder(); + LogInfo::MapleLogger() << "****************SANRAZOR instrumenting****************" + << "\n"; + MIRType *voidType = GlobalTables::GetTypeTable().GetVoid(); + // type 0 is user check, type 1 is sanitzer check + int type_of_check = 0; + // if br_true set to 1, else set to 0 + int br_true = 0; + // we check one step, than instrument_flag set to false + bool instrument_flag = false; + // info to plugin the shared lib + int bb_id = 0; + int stmt_id = 0; + MeCFG *cfg = mefunc.GetCfg(); + + std::set reg_order; + std::map> reg_to_stmt; + + std::set var_order; + std::map> var_to_stmt; + + std::vector san_set_check; + std::vector san_set_check_ID; + + std::vector user_set_check; + std::vector user_set_check_ID; + + std::vector stmt_to_remove; + std::vector call_stmt_to_remove; + std::vector> stmt_to_cleanup; + + for (BB *bb : cfg->GetAllBBs()) { + if (bb) { + for (StmtNode &stmt : bb->GetStmtNodes()) { + std::vector stmt_reg; + // OP_regassign -> = + if (stmt.GetOpCode() == OP_regassign) { + std::vector reg_redef_check_vec; + RegassignNode *regAssign = static_cast(&stmt); + if (reg_to_stmt.count(regAssign->GetRegIdx()) == 0) { + reg_order.insert(regAssign->GetRegIdx()); + } + reg_to_stmt[regAssign->GetRegIdx()].push_back(&stmt); + } else if (stmt.GetOpCode() == OP_dassign || stmt.GetOpCode() == OP_maydassign) { + std::vector var_redef_check_vec; + DassignNode *dassign = static_cast(&stmt); + // uint32 + if (var_to_stmt.count(dassign->GetStIdx().Idx()) == 0) { + var_order.insert(dassign->GetStIdx().Idx()); + } + var_to_stmt[dassign->GetStIdx().Idx()].push_back(&stmt); + } + // Unsupported OPCODE: + // 1. iassignoff (, ) + // 2023-02-07: I added iassignoff as interestedMemoryAccess, the address is + // calculated by ` + offset`. Hence, the instrumented code is + // simply the same as iassign + // 2. callassigned + // 2023-02-07: I added callassigned to transform the returned variables' names + // there are dassign OpCodes inside callassigned instruction + else if (stmt.GetOpCode() == OP_callassigned) { + /* + callassigned (, ..., ) { + dassign + dassign + ... + dassign } + */ + // We currently skip it, the retVar_XXX variable should not be instrumented + } else if (stmt.GetOpCode() == OP_iassign) { + // syntax: iassign (, ) + // %addr-expr = + BaseNode *addr_expr = stmt.Opnd(0); + // addr_expr have 3 cases + // iread u64 <* <$_TY_IDX111>> 22 (regread ptr %177) + if (addr_expr->GetOpCode() == OP_iread) { + std::vector dump_reg; + recursion(addr_expr, dump_reg); + for (int32 reg_tmp : dump_reg) { + if (reg_to_stmt.count(reg_tmp) == 0) { + reg_order.insert(reg_tmp); + } + reg_to_stmt[reg_tmp].push_back(&stmt); + } + } else if (addr_expr->GetOpCode() == OP_regread) { + // regread ptr %14 + RegreadNode *regread = static_cast(addr_expr); + if (reg_to_stmt.count(regread->GetRegIdx()) == 0) { + reg_order.insert(regread->GetRegIdx()); + } + reg_to_stmt[regread->GetRegIdx()].push_back(&stmt); + } else if (addr_expr->GetOpCode() == OP_dread) { + // dread i64 %asan_shadowBase + DreadNode *dread = static_cast(addr_expr); + if (var_to_stmt.count(dread->GetStIdx().Idx()) == 0) { + var_order.insert(dread->GetStIdx().Idx()); + } + var_to_stmt[dread->GetStIdx().Idx()].push_back(&stmt); + } else if (IsCommutative(addr_expr->GetOpCode())) { + std::vector dump_reg; + recursion(addr_expr->Opnd(0), dump_reg); + for (int32 reg_tmp : dump_reg) { + if (reg_to_stmt.count(reg_tmp) == 0) { + reg_order.insert(reg_tmp); + } + reg_to_stmt[reg_tmp].push_back(&stmt); + } + } + } else if (stmt.GetOpCode() == OP_brtrue || stmt.GetOpCode() == OP_brfalse) { + set_check br_tmp; + dep_expansion(stmt.Opnd(0), br_tmp, reg_to_stmt, var_to_stmt, mefunc); + gen_register_dep(&stmt, br_tmp, reg_to_stmt, var_to_stmt, mefunc); + + CondGotoNode *cgotoNode = static_cast(&stmt); + StmtNode *nextStmt = stmt.GetRealNext(); + instrument_flag = false; + // if it is a user check + if (userchecks.count(&stmt) > 0) { + instrument_flag = true; + user_set_check.push_back(br_tmp); + user_set_check_ID.push_back(stmt.GetStmtID()); + } else if (nextStmt != nullptr) { + if (CallNode *testcallNode = dynamic_cast(nextStmt)) { + MIRFunction *testcalleeFunc = + GlobalTables::GetFunctionTable().GetFunctionFromPuidx(testcallNode->GetPUIdx()); + // instrument if it is a call to sanitzer + if (testcalleeFunc->GetName().find("__asan_report_") == 0) { + san_set_check.push_back(br_tmp); + san_set_check_ID.push_back(stmt.GetStmtID()); + instrument_flag = true; + } + } + } + if (instrument_flag) { + uint32 goto_id = cgotoNode->GetOffset(); + brgoto_map[goto_id].push_back(&stmt); + uint32 lb_id = (static_cast(&stmt))->GetLabelIdx(); + // save the BB id for checking + stmt_to_bbID[lb_id] = bb->UintID(); + stmt_id_to_stmt[stmt.GetStmtID()] = &stmt; + stmt_id_list.push_back(stmt.GetStmtID()); + } + } + } + } + } + if (brgoto_map.size() > 0) { + // We loop again, if + for (BB *bb : cfg->GetAllBBs()) { + if (bb) { + for (StmtNode &stmt : bb->GetStmtNodes()) { + uint32 label_index = (static_cast(&stmt))->GetLabelIdx(); + auto iter = brgoto_map.find(label_index); + // Some instruction with goto, will have the same LB id, as result + // we may double count our coverage, so, we exclude op_goto + if (iter != brgoto_map.end() && stmt.GetOpCode() != OP_goto) { + std::vector tmp = brgoto_map[label_index]; + for (auto stmt_tmp : tmp) { + uint32 tmp_label_index = (static_cast(stmt_tmp))->GetLabelIdx(); + auto id_check = stmt_to_bbID.find(tmp_label_index); + if (id_check == stmt_to_bbID.end()) { + bb_id = 0; + } else { + bb_id = stmt_to_bbID[tmp_label_index]; + } + stmt_id = stmt_tmp->GetStmtID(); + // We reverse the logic here + // Since brtrue, means jump if the check equal to true + // The instruction itself will need to be false in order for being executed + if (stmt_tmp->GetOpCode() == OP_brtrue) { + br_true = 0; + } else { + br_true = 1; + } + // record whether it is a usercheck or sancheck + auto search = userchecks.find(stmt_tmp); + if (search != userchecks.end()) { + type_of_check = 0; + } else { + type_of_check = 1; + } + CallNode *caller_cov = retCallCOV(mefunc, bb_id, stmt_id, br_true, type_of_check); + CallNode *callee_cov = retCallCOV(mefunc, bb_id, stmt_id, br_true ^ 1, type_of_check); + caller_cov->InsertBeforeThis(*stmt_tmp); + callee_cov->InsertBeforeThis(stmt); + stmt_to_cleanup.emplace_back(caller_cov, bb); + stmt_to_cleanup.emplace_back(callee_cov, bb); + } + } + } + } + } + } + + if (check_env == 2) { + LogInfo::MapleLogger() << "Solving Sat" + << "\n"; + // If is eliminate mode + std::string fn_UC = mefunc.GetMIRModule().GetFileName() + "_UC"; + std::string fn_SC = mefunc.GetMIRModule().GetFileName() + "_SC"; + std::map san_struct_UC = gen_dynmatch(fn_UC); + std::map san_struct_SC = gen_dynmatch(fn_SC); + std::map> SC_SC_mapping; + std::map> UC_SC_mapping; + + for (auto const &[id_UC, val_UC] : san_struct_UC) { + for (auto const &[id_SC, val_SC] : san_struct_SC) { + // For SC-UC case, SC must be var a + if (dynamic_sat(val_SC, val_UC, false)) { + if (UC_SC_mapping.count(id_SC)) { + UC_SC_mapping[id_SC].insert(id_UC); + } else { + std::set tmp_set; + tmp_set.insert(id_UC); + UC_SC_mapping[id_SC] = tmp_set; + } + if (UC_SC_mapping.count(id_UC)) { + UC_SC_mapping[id_UC].insert(id_SC); + } else { + std::set tmp_set; + tmp_set.insert(id_SC); + UC_SC_mapping[id_UC] = tmp_set; + } + } + } + } + + for (auto const &[id_SC_1, val_SC_1] : san_struct_SC) { + for (auto const &[id_SC_2, val_SC_2] : san_struct_SC) { + if (id_SC_1 != id_SC_2) { + if (dynamic_sat(val_SC_1, val_SC_2, false)) { + if (SC_SC_mapping.count(id_SC_1)) { + SC_SC_mapping[id_SC_1].insert(id_SC_2); + } else { + std::set tmp_set; + tmp_set.insert(id_SC_2); + SC_SC_mapping[id_SC_1] = tmp_set; + } + if (SC_SC_mapping.count(id_SC_2)) { + SC_SC_mapping[id_SC_2].insert(id_SC_1); + } else { + std::set tmp_set; + tmp_set.insert(id_SC_1); + SC_SC_mapping[id_SC_2] = tmp_set; + } + } + } + } + } + // san deletion + int SCSC_SAT_CNT = 0; + int SCSC_SAT_RUNS = 0; + for (size_t san_i = 0; san_i < san_set_check.size(); san_i++) { + for (size_t san_j = san_i + 1; san_j < san_set_check.size(); san_j++) { + SCSC_SAT_RUNS += 1; + uint32 san_i_stmt_ID = san_set_check_ID[san_i]; + uint32 san_j_stmt_ID = san_set_check_ID[san_j]; + if (SC_SC_mapping.count(san_i_stmt_ID)) { + if (SC_SC_mapping[san_i_stmt_ID].count(san_j_stmt_ID)) { + if (sat_check(san_set_check[san_i], san_set_check[san_j])) { + SCSC_SAT_CNT += 1; + StmtNode *erase_stmt; + // we just assume the larger the stmtID + // the later the stmt appears, which mostly work + if (san_i_stmt_ID > san_j_stmt_ID) { + erase_stmt = stmt_id_to_stmt[san_i_stmt_ID]; + } else { + erase_stmt = stmt_id_to_stmt[san_j_stmt_ID]; + } + if (std::count(stmt_to_remove.begin(), stmt_to_remove.end(), erase_stmt) == 0) { + stmt_to_remove.push_back(erase_stmt); + call_stmt_to_remove.push_back(erase_stmt->GetRealNext()->GetRealNext()); + } + } + } + } + } + } + int UCSC_SAT_CNT = 0; + int UCSC_SAT_RUNS = 0; + for (size_t san_i = 0; san_i < san_set_check.size(); san_i++) { + for (size_t user_j = 0; user_j < user_set_check.size(); user_j++) { + UCSC_SAT_RUNS += 1; + uint32 san_i_stmt_ID = san_set_check_ID[san_i]; + uint32 user_j_stmt_ID = user_set_check_ID[user_j]; + if (UC_SC_mapping.count(san_i_stmt_ID)) { + if (UC_SC_mapping[san_i_stmt_ID].count(user_j_stmt_ID)) { + print_dep(user_set_check[user_j]); + print_dep(san_set_check[san_i]); + bool goflag = false; + if (sat_check(user_set_check[user_j], san_set_check[san_i])) { + goflag = true; + } else { + san_set_check[san_i].opcode.erase( + std::remove_if(san_set_check[san_i].opcode.begin(), san_set_check[san_i].opcode.end(), isBlacklist), + san_set_check[san_i].opcode.end()); + if (sat_check(user_set_check[user_j], san_set_check[san_i])) { + goflag = true; + } + } + if (goflag) { + UCSC_SAT_CNT += 1; + StmtNode *erase_stmt = stmt_id_to_stmt[san_i_stmt_ID]; + if (std::count(stmt_to_remove.begin(), stmt_to_remove.end(), erase_stmt) == 0) { + stmt_to_remove.push_back(erase_stmt); + call_stmt_to_remove.push_back(erase_stmt->GetRealNext()->GetRealNext()); + } + } + } + } + } + } + LogInfo::MapleLogger() << "UC size: " << user_set_check.size() << "\n "; + LogInfo::MapleLogger() << "SC size: " << san_set_check.size() << "\n "; + + LogInfo::MapleLogger() << "Total UC-SC pairs: " << UCSC_SAT_RUNS << " Eliminate: " << UCSC_SAT_CNT << "\n "; + LogInfo::MapleLogger() << "Total SC-SC pairs: " << SCSC_SAT_RUNS << " Eliminate: " << SCSC_SAT_CNT << "\n "; + + LogInfo::MapleLogger() << "Removing phase: \n"; + for (BB *bb : cfg->GetAllBBs()) { + if (bb) { + for (StmtNode &stmt : bb->GetStmtNodes()) { + if (std::count(stmt_to_remove.begin(), stmt_to_remove.end(), &stmt)) { + if (CallNode *testcallNode = dynamic_cast(&stmt)) { + stmt_to_cleanup.emplace_back(&stmt, bb); + } else { + set_check br_tmp; + dep_expansion(stmt.Opnd(0), br_tmp, reg_to_stmt, var_to_stmt, mefunc); + std::set tmp_var_set; + while (!br_tmp.var_live.empty()) { + size_t var_to_check = br_tmp.var_live.top(); + tmp_var_set.insert(var_to_check); + br_tmp.var_live.pop(); + } + bool term_flag = false; + StmtNode *prevStmt = stmt.GetPrev(); + while (!term_flag && prevStmt != nullptr) { + if (prevStmt->GetOpCode() == OP_brtrue || prevStmt->GetOpCode() == OP_brfalse) { + set_check br_local_tmp; + bool trigger = false; + dep_expansion(prevStmt->Opnd(0), br_local_tmp, reg_to_stmt, var_to_stmt, mefunc); + while (!br_local_tmp.var_live.empty()) { + uint32 var_to_check = br_local_tmp.var_live.top(); + if (mefunc.GetMIRModule().CurFunction()->GetSymbolTabSize() >= var_to_check) { + MIRSymbol *var = mefunc.GetMIRModule().CurFunction()->GetSymbolTabItem(var_to_check); + if (var->GetName().find("asan_addr") == 0) { + trigger = true; + tmp_var_set.insert(var_to_check); + } + } + br_local_tmp.var_live.pop(); + } + // we hit a possible UC, we terminate here + if (!trigger) { + term_flag = true; + } else { + prevStmt = prevStmt->GetPrev(); + // bb->RemoveStmtNode(prevStmt->GetRealNext()); + stmt_to_cleanup.emplace_back(prevStmt->GetRealNext(), bb); + } + } else if (prevStmt->GetOpCode() == OP_dassign) { + DassignNode *dassign = static_cast(prevStmt); + // dump extra dependence + set_check br_local_tmp; + dep_expansion(prevStmt, br_local_tmp, reg_to_stmt, var_to_stmt, mefunc); + while (!br_local_tmp.var_live.empty()) { + uint32 var_to_check = br_local_tmp.var_live.top(); + if (mefunc.GetMIRModule().CurFunction()->GetSymbolTabSize() >= var_to_check) { + MIRSymbol *var = mefunc.GetMIRModule().CurFunction()->GetSymbolTabItem(var_to_check); + if (var->GetName().find("asan_addr") == 0) { + tmp_var_set.insert(var_to_check); + } + } + br_local_tmp.var_live.pop(); + } + if (tmp_var_set.count(dassign->GetStIdx().Idx())) { + prevStmt = prevStmt->GetPrev(); + stmt_to_cleanup.emplace_back(prevStmt->GetRealNext(), bb); + } else { + prevStmt = prevStmt->GetPrev(); + } + } else if (prevStmt->GetOpCode() == OP_dassignoff) { + /* + The dassignoff is not documented in the MAPLE IR + It simulate the iassignoff implementation + dassignoff () + */ + DassignoffNode *dassignoff = dynamic_cast(prevStmt); + CHECK_FATAL(dassignoff != nullptr, "Node with OP_dassignoff but not DassignoffNode"); + // TODO: I am not sure what should be done for it now ... + } else if (CallNode *tmpTestCallNode = dynamic_cast(prevStmt)) { + // stop if we hit a Call + term_flag = true; + } else { + prevStmt = prevStmt->GetPrev(); + } + } + stmt_to_cleanup.emplace_back(&stmt, bb); + } + } + } + } + } + for (auto bb_pair : stmt_to_cleanup) { + bb_pair.second->RemoveStmtNode(bb_pair.first); + } + int erase_ctr = 0; + LogInfo::MapleLogger() << "Clean up redundant call stmt " + << "\n"; + BlockNode *bodyNode = mefunc.GetMirFunc()->GetBody(); + for (auto stmt : call_stmt_to_remove) { + erase_ctr += 1; + bodyNode->RemoveStmt(stmt); + } + LogInfo::MapleLogger() << "Erased: " << erase_ctr << "\n"; + } + if ((mefunc.GetName().compare("main") == 0) && (check_env == 1)) { + // Register the call, such it dump the coverage at the exit + __san_cov_flush = getOrInsertFunction(builder, "__san_cov_flush", voidType, {}); + // Insert the atexit to the starting point of the main + MapleVector args(mefunc.GetMIRModule().GetMPAllocator().Adapter()); + StmtNode *stmt_tmp = builder->CreateStmtCall(__san_cov_flush->GetPuidx(), args); + mefunc.GetMirFunc()->GetBody()->InsertFirst(stmt_tmp); + } + LogInfo::MapleLogger() << "****************SANRAZOR Done****************" + << "\n"; +} + +void AddressSanitizer::instrumentMemIntrinsic(IntrinsiccallNode *stmtNode) { + if (stmtNode == nullptr) { + return; + } + + switch (stmtNode->GetIntrinsic()) { + case INTRN_C_memset: { + MapleVector args(module->GetMPAllocator().Adapter()); + args.emplace_back(stmtNode->Opnd(0)); + args.emplace_back(stmtNode->Opnd(1)); + args.emplace_back(stmtNode->Opnd(2)); + + CallNode *registerCallNode = module->GetMIRBuilder()->CreateStmtCall(AsanMemset->GetPuidx(), args); + func->GetMirFunc()->GetBody()->ReplaceStmt1WithStmt2(stmtNode, registerCallNode); + return; + } + case INTRN_C_memmove: { + MapleVector args(module->GetMPAllocator().Adapter()); + args.emplace_back(stmtNode->Opnd(0)); + args.emplace_back(stmtNode->Opnd(1)); + args.emplace_back(stmtNode->Opnd(2)); + + CallNode *registerCallNode = module->GetMIRBuilder()->CreateStmtCall(AsanMemmove->GetPuidx(), args); + func->GetMirFunc()->GetBody()->ReplaceStmt1WithStmt2(stmtNode, registerCallNode); + return; + } + case INTRN_C_memcpy: { + MapleVector args(module->GetMPAllocator().Adapter()); + args.emplace_back(stmtNode->Opnd(0)); + args.emplace_back(stmtNode->Opnd(1)); + args.emplace_back(stmtNode->Opnd(2)); + + CallNode *registerCallNode = module->GetMIRBuilder()->CreateStmtCall(AsanMemcpy->GetPuidx(), args); + func->GetMirFunc()->GetBody()->ReplaceStmt1WithStmt2(stmtNode, registerCallNode); + return; + } + default: { + return; + } + } +} + +MemoryAccess AddressSanitizer::getIassignMemoryAccess(IassignNode &iassign) { + MIRType *mirType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(iassign.GetTyIdx()); + MIRPtrType *pointerType = static_cast(mirType); + MIRType *pointedTy = GlobalTables::GetTypeTable().GetTypeFromTyIdx(pointerType->GetPointedTyIdx()); + size_t align = pointedTy->GetAlign(); + if (pointedTy->IsStructType()) { + MIRStructType *mirStructType = dynamic_cast(pointedTy); + if (iassign.GetFieldID() > 0) { + pointedTy = mirStructType->GetFieldType(iassign.GetFieldID()); + align = pointedTy->GetAlign(); + } else { + align = pointedTy->GetSize(); + } + } + BaseNode *addr = + module->GetMIRBuilder()->CreateExprIaddrof(PTY_u64, iassign.GetTyIdx(), iassign.GetFieldID(), iassign.Opnd(0)); + MemoryAccess memoryAccess = {&iassign, true, pointedTy->GetSize() << 3, align, addr}; + return memoryAccess; +} + +MemoryAccess AddressSanitizer::getIassignoffMemoryAccess(IassignoffNode &iassignoff) { + int32 offset = iassignoff.GetOffset(); + BaseNode *addrNode = iassignoff.GetBOpnd(0); + PrimType primType = iassignoff.GetPrimType(); + PrimType addrPrimType = addrNode->GetPrimType(); + BaseNode *addrExpr = nullptr; + size_t primTypeSize = GetPrimTypeSize(primType); + if (offset == 0) { + addrExpr = addrNode; + } else { + MIRType *mirType = module->CurFuncCodeMemPool()->New(MIRTypeKind::kTypePointer, addrPrimType); + MIRIntConst *offsetConstVal = module->CurFuncCodeMemPool()->New(offset, *mirType); + ConstvalNode *offsetNode = module->CurFuncCodeMemPool()->New(addrPrimType); + offsetNode->SetConstVal(offsetConstVal); + addrExpr = module->CurFuncCodeMemPool()->New(OP_add, addrPrimType, addrNode, offsetNode); + } + BaseNode *addr = module->GetMIRBuilder()->CreateExprIaddrof(addrPrimType, TyIdx(primType), 0, addrExpr); + MemoryAccess memoryAccess = {&iassignoff, true, primTypeSize << 3, primTypeSize, addr}; + return memoryAccess; +} + +MemoryAccess AddressSanitizer::getIreadMemoryAccess(IreadNode &iread, StmtNode *stmtNode) { + MIRType *mirType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(iread.GetTyIdx()); + MIRPtrType *pointerType = static_cast(mirType); + MIRType *pointedTy = GlobalTables::GetTypeTable().GetTypeFromTyIdx(pointerType->GetPointedTyIdx()); + size_t align = pointedTy->GetAlign(); + if (pointedTy->IsStructType()) { + MIRStructType *mirStructType = dynamic_cast(pointedTy); + if (iread.GetFieldID() > 0) { + pointedTy = mirStructType->GetFieldType(iread.GetFieldID()); + align = pointedTy->GetAlign(); + } else { + align = pointedTy->GetSize(); + } + } + BaseNode *addr = + module->GetMIRBuilder()->CreateExprIaddrof(PTY_u64, iread.GetTyIdx(), iread.GetFieldID(), iread.Opnd(0)); + MemoryAccess memoryAccess = {stmtNode, false, pointedTy->GetSize() << 3, align, addr}; + return memoryAccess; +} + +std::vector AddressSanitizer::isInterestingMemoryAccess(StmtNode *stmtNode) { + std::vector memAccess; + if (LocalDynamicShadow == stmtNode) { + return memAccess; + } + + std::stack baseNodeStack; + baseNodeStack.push(stmtNode); + while (!baseNodeStack.empty()) { + BaseNode *baseNode = baseNodeStack.top(); + CHECK_FATAL(baseNode != nullptr, "Invalid IR node pointer."); + baseNodeStack.pop(); + switch (baseNode->GetOpCode()) { + case OP_iassign: { + IassignNode *iassign = dynamic_cast(baseNode); + CHECK_FATAL((iassign != nullptr), "Invalid IR node with OpCode OP_iassign"); + struct MemoryAccess memoryAccess = getIassignMemoryAccess(*iassign); + memAccess.emplace_back(memoryAccess); + // the rhs-expr can still read from somewhere, push it to stack + baseNodeStack.push(iassign->Opnd(1)); + break; + } + case OP_iassignoff: { + IassignoffNode *iassignoff = dynamic_cast(baseNode); + CHECK_FATAL((iassignoff != nullptr), "Invalid IR node with OpCode OP_iassignoff"); + struct MemoryAccess memoryAccess = getIassignoffMemoryAccess(*iassignoff); + memAccess.emplace_back(memoryAccess); + // the rhs-expr can still read from somewhere, push it to stack + baseNodeStack.push(iassignoff->GetBOpnd(1)); + break; + } + case OP_iassignfpoff: + case OP_iassignpcoff: + break; + case OP_iread: { + IreadNode *iread = nullptr; + if (baseNode->IsSSANode()) { + iread = dynamic_cast(dynamic_cast(baseNode)->GetNoSSANode()); + } else { + iread = dynamic_cast(baseNode); + } + CHECK_FATAL((iread != nullptr), "Invalid IR node with OpCode OP_iread."); + struct MemoryAccess memoryAccess = getIreadMemoryAccess(*iread, stmtNode); + memAccess.emplace_back(memoryAccess); + break; + } + case OP_ireadoff: + case OP_ireadfpoff: + case OP_ireadpcoff: + break; + default: { + } + } + for (size_t j = 0; j < baseNode->NumOpnds(); ++j) { + if (baseNode->GetOpCode() == OP_return) continue; + baseNodeStack.push(baseNode->Opnd(j)); + } + } + LogInfo::MapleLogger() << "ASAN isInterestingMemoryAccess is done.\n"; + return memAccess; +} + +void AddressSanitizer::initializeCallbacks(const MIRModule &mirModule) { + MIRBuilder *mirBuilder = mirModule.GetMIRBuilder(); + MIRType *voidType = GlobalTables::GetTypeTable().GetVoid(); + MIRType *Int32Type = GlobalTables::GetTypeTable().GetPrimType(PTY_i32); + MIRType *Int8PtrType = + GlobalTables::GetTypeTable().GetOrCreatePointerType(GlobalTables::GetTypeTable().GetInt8()->GetTypeIndex()); + +#ifdef ENABLERBTREE + AsanRBTSafetyCheck = getOrInsertFunction(mirBuilder, "__asan_rbt_safety_check", voidType, {IntPtrTy, Int32Type}); + AsanRBTStackInsert = getOrInsertFunction(mirBuilder, "__asan_rbt_stack_insert", voidType, {IntPtrTy, Int32Type}); + AsanRBTStackDelete = getOrInsertFunction(mirBuilder, "__asan_rbt_stack_delete", voidType, {IntPtrTy, Int32Type}); +#else + for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { + const std::string TypeStr = AccessIsWrite ? "store" : "load"; + AsanErrorCallbackSized[AccessIsWrite] = getOrInsertFunction( + mirBuilder, (kAsanReportErrorTemplate + TypeStr + "_n").c_str(), voidType, {IntPtrTy, IntPtrTy}); + for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; AccessSizeIndex++) { + const std::string Suffix = TypeStr + std::to_string(1ULL << AccessSizeIndex); + AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = + getOrInsertFunction(mirBuilder, (kAsanReportErrorTemplate + Suffix).c_str(), voidType, {IntPtrTy}); + } + } +#endif + + AsanMemmove = getOrInsertFunction(mirBuilder, "__asan_memmove", Int8PtrType, {Int8PtrType, Int8PtrType, IntPtrTy}); + AsanMemcpy = getOrInsertFunction(mirBuilder, "__asan_memcpy", Int8PtrType, {Int8PtrType, Int8PtrType, IntPtrTy}); + AsanMemset = getOrInsertFunction(mirBuilder, "__asan_memset", Int8PtrType, {Int8PtrType, Int32Type, IntPtrTy}); + + AsanHandleNoReturnFunc = getOrInsertFunction(mirBuilder, kAsanHandleNoReturnName, voidType, {}); +} + +void AddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(const MeFunction &F) { + if (Mapping.Offset != kDynamicShadowSentinel) { + return; + } + MIRBuilder *mirBuilder = F.GetMIRModule().GetMIRBuilder(); + MIRSymbol *GlobalDynamicAddress = mirBuilder->GetOrCreateGlobalDecl(kAsanShadowMemoryDynamicAddress, *IntPtrTy); + DreadNode *dreadNode = mirBuilder->CreateDread(*GlobalDynamicAddress, PTY_ptr); + MIRType *Int64PtrTy = GlobalTables::GetTypeTable().GetOrCreatePointerType(IntPtrTy->GetTypeIndex()); + LocalDynamicShadow = mirBuilder->CreateExprIread(*IntPtrTy, *Int64PtrTy, 0, dreadNode); +} + +void AddressSanitizer::instrumentMop(StmtNode *I, std::vector &memoryAccess) { + assert(memoryAccess.size() > 0); + + size_t granularity = 1 << Mapping.Scale; + for (MemoryAccess access : memoryAccess) { + doInstrumentAddress(this, I, access.ptrOperand, access.alignment, granularity, access.typeSize, access.isWrite); + } +} + +BaseNode *AddressSanitizer::memToShadow(BaseNode *Shadow, MIRBuilder &mirBuilder) { + Shadow = mirBuilder.CreateExprBinary(OP_ashr, *GlobalTables::GetTypeTable().GetInt64(), Shadow, + mirBuilder.CreateIntConst(Mapping.Scale, IntPtrPrim)); + if (Mapping.Offset == 0) { + return Shadow; + } + BaseNode *ShadowBase; + if (LocalDynamicShadow) { + ShadowBase = LocalDynamicShadow; + } else { + ShadowBase = mirBuilder.CreateIntConst(Mapping.Offset, IntPtrPrim); + } + if (Mapping.OrShadowOffset) { + return mirBuilder.CreateExprBinary(OP_lior, *GlobalTables::GetTypeTable().GetInt64(), Shadow, ShadowBase); + } else { + return mirBuilder.CreateExprBinary(OP_add, *GlobalTables::GetTypeTable().GetInt64(), Shadow, ShadowBase); + } +} + +void AddressSanitizer::instrumentAddress(StmtNode *InsertBefore, BaseNode *Addr, uint64_t TypeSize, + bool IsWrite, BaseNode *SizeArgument) { + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + +#ifdef ENABLERBTREE + auto i32PrimTy = GlobalTables::GetTypeTable().GetInt32()->GetPrimType(); + MapleVector args(mirBuilder->GetCurrentFuncCodeMpAllocator()->Adapter()); + args.emplace_back(Addr); + args.emplace_back(mirBuilder->CreateIntConst(TypeSize / 8, i32PrimTy)); + func->GetMirFunc()->GetBody()->InsertBefore(InsertBefore, + mirBuilder->CreateStmtCall(AsanRBTSafetyCheck->GetPuidx(), args)); + return; +#endif + + size_t accessSizeIndex = TypeSizeToSizeIndex(TypeSize); + MIRSymbol *addrSymbol = getOrCreateSymbol(mirBuilder, IntPtrTy->GetTypeIndex(), "asan_addr", kStVar, kScAuto, + module->CurFunction(), kScopeLocal); + DassignNode *dassignNode = mirBuilder->CreateStmtDassign(addrSymbol->GetStIdx(), 0, Addr); + + func->GetMirFunc()->GetBody()->InsertBefore(InsertBefore, dassignNode); + + // Assign the address to %addr + MIRType *shadowTy = GlobalTables::GetTypeTable().GetInt8(); + MIRPtrType *shadowPtrTy = + dynamic_cast(GlobalTables::GetTypeTable().GetOrCreatePointerType(shadowTy->GetTypeIndex())); + // Get the address of shadow value + BaseNode *shadowPtr = memToShadow(mirBuilder->CreateDread(*addrSymbol, IntPtrPrim), *mirBuilder); + BaseNode *cmpVal = mirBuilder->CreateIntConst(0, shadowTy->GetPrimType()); + // Get the value of shadow memory + MIRSymbol *shadowValue = getOrCreateSymbol(mirBuilder, shadowTy->GetTypeIndex(), "asan_shadowValue", kStVar, kScAuto, + module->CurFunction(), kScopeLocal); + dassignNode = mirBuilder->CreateStmtDassign(shadowValue->GetStIdx(), 0, + mirBuilder->CreateExprIread(*shadowTy, *shadowPtrTy, 0, shadowPtr)); + dassignNode->InsertAfterThis(*InsertBefore); + // Check if value != 0 + BinaryNode *cmp = mirBuilder->CreateExprBinary( + OP_ne, *shadowTy, mirBuilder->CreateDread(*shadowValue, shadowTy->GetPrimType()), cmpVal); + size_t granularity = 1ULL << Mapping.Scale; + + StmtNode *crashBlock; + if (TypeSize < 8 * granularity) { + StmtNode *checkBlock = splitIfAndElseBlock(OP_brfalse, InsertBefore, cmp); + BinaryNode *cmp2 = createSlowPathCmp(checkBlock, mirBuilder->CreateDread(*addrSymbol, PTY_i64), + mirBuilder->CreateDread(*shadowValue, shadowTy->GetPrimType()), TypeSize); + + crashBlock = splitIfAndElseBlock(OP_brfalse, InsertBefore->GetPrev(), cmp2); + + } else { + crashBlock = splitIfAndElseBlock(OP_brfalse, InsertBefore, cmp); + } + CallNode *crash = generateCrashCode(addrSymbol, IsWrite, accessSizeIndex, SizeArgument); + crash->InsertBeforeThis(*crashBlock); +} + +void AddressSanitizer::instrumentUnusualSizeOrAlignment(StmtNode *InsertBefore, BaseNode *Addr, + uint64_t TypeSize, bool IsWrite) { + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + BaseNode *size = mirBuilder->CreateIntConst(TypeSize / 8, IntPtrPrim); + MIRSymbol *addrSymbol = getOrCreateSymbol(mirBuilder, IntPtrTy->GetTypeIndex(), "asan_addr", kStVar, kScAuto, + module->CurFunction(), kScopeLocal); + DassignNode *dassignNode = mirBuilder->CreateStmtDassign(addrSymbol->GetStIdx(), 0, Addr); + dassignNode->InsertAfterThis(*InsertBefore); + BinaryNode *binaryNode = + mirBuilder->CreateExprBinary(OP_add, *IntPtrTy, mirBuilder->CreateDread(*addrSymbol, IntPtrPrim), + mirBuilder->CreateIntConst(TypeSize / 8 - 1, IntPtrPrim)); + MIRSymbol *lastByteSymbol = getOrCreateSymbol(mirBuilder, IntPtrTy->GetTypeIndex(), "asan_lastByte", kStVar, kScAuto, + module->CurFunction(), kScopeLocal); + DassignNode *lastByte = mirBuilder->CreateStmtDassign(lastByteSymbol->GetStIdx(), 0, binaryNode); + lastByte->InsertAfterThis(*InsertBefore); + instrumentAddress(InsertBefore, Addr, 8, IsWrite, size); + instrumentAddress(InsertBefore, mirBuilder->CreateDread(*lastByteSymbol, PTY_ptr), 8, IsWrite, size); +} + +BinaryNode *AddressSanitizer::createSlowPathCmp(StmtNode *InsBefore, BaseNode *AddrLong, BaseNode *ShadowValue, + uint64_t TypeSize) { + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + size_t granularity = static_cast(1) << Mapping.Scale; + // Addr & (Granularity - 1) + BinaryNode *lastAccessedByte = mirBuilder->CreateExprBinary(OP_band, *IntPtrTy, AddrLong, + mirBuilder->CreateIntConst(granularity - 1, IntPtrPrim)); + // (Addr & (Granularity - 1)) + size - 1 + if (TypeSize / 8 > 1) { + lastAccessedByte = mirBuilder->CreateExprBinary(OP_add, *IntPtrTy, lastAccessedByte, + mirBuilder->CreateIntConst(TypeSize / 8 - 1, IntPtrPrim)); + } + // (uint8_t) ((Addr & (Granularity-1)) + size - 1) + MIRSymbol *length = getOrCreateSymbol(mirBuilder, TyIdx(ShadowValue->GetPrimType()), "asan_length", kStVar, kScAuto, + module->CurFunction(), kScopeLocal); + DassignNode *dassignNode = mirBuilder->CreateStmtDassign(length->GetStIdx(), 0, lastAccessedByte); + dassignNode->InsertBeforeThis(*InsBefore); + // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue + return mirBuilder->CreateExprBinary(OP_ge, *GlobalTables::GetTypeTable().GetTypeFromTyIdx(ShadowValue->GetPrimType()), + mirBuilder->CreateDread(*length, ShadowValue->GetPrimType()), ShadowValue); +} + +CallNode *AddressSanitizer::generateCrashCode(MIRSymbol *Addr, bool IsWrite, size_t AccessSizeIndex, + BaseNode *SizeArgument) { + CallNode *callNode = nullptr; + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + MapleVector args(mirBuilder->GetCurrentFuncCodeMpAllocator()->Adapter()); + args.emplace_back(mirBuilder->CreateDread(*Addr, IntPtrPrim)); + if (SizeArgument) { + args.emplace_back(SizeArgument); + callNode = mirBuilder->CreateStmtCall(AsanErrorCallbackSized[IsWrite]->GetPuidx(), args); + } else { + callNode = mirBuilder->CreateStmtCall(AsanErrorCallback[IsWrite][AccessSizeIndex]->GetPuidx(), args); + } + return callNode; +} + +StmtNode *AddressSanitizer::splitIfAndElseBlock(Opcode op, StmtNode *elsePart, const BinaryNode *cmpStmt) { + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + auto *cmpNode = mirBuilder->CreateExprCompare( + cmpStmt->GetOpCode(), *GlobalTables::GetTypeTable().GetTypeFromTyIdx(TyIdx(cmpStmt->GetPrimType())), + *GlobalTables::GetTypeTable().GetTypeFromTyIdx(TyIdx(cmpStmt->GetPrimType())), cmpStmt->Opnd(0), + cmpStmt->Opnd(1)); + LabelIdx labelIdx = module->CurFunction()->GetLabelTab()->CreateLabel(); + module->CurFunction()->GetLabelTab()->AddToStringLabelMap(labelIdx); + CondGotoNode *brStmt = mirBuilder->CreateStmtCondGoto(cmpNode, op, labelIdx); + brStmt->InsertAfterThis(*elsePart); + + brStmt->SetOffset(labelIdx); + LabelNode *labelStmt = module->CurFuncCodeMemPool()->New(); + labelStmt->SetLabelIdx(labelIdx); + labelStmt->InsertAfterThis(*elsePart); + return brStmt; +} + +bool AddressSanitizer::isInterestingSymbol(const MIRSymbol &symbol) { + if (StringUtils::StartsWith(symbol.GetName(), "asan_")) { + return false; + } + if (StringUtils::StartsWith(symbol.GetName(), "retVar_")) { + /* some variables have names with `retVar_` + they are inserted by compiler to capture the returned value + of a function. We skip these variables since they are + inserted by compiler. + Currently, there is no flag to identify whether a variable is + created by users or ArkCC, we have to use this naive approach. + */ + return false; + } + if (StringUtils::StartsWith(symbol.GetName(), "_temp_.shortcircuit.")) { + return false; + } + if (ProcessedSymbols.find(&symbol) != ProcessedSymbols.end()) { + return ProcessedSymbols[&symbol]; + } + if (std::find(preAnalysis->usedInAddrof.begin(), preAnalysis->usedInAddrof.end(), &symbol) != + preAnalysis->usedInAddrof.end()) { + return true; + } + bool isInteresting = true; + + MIRType *mirType = symbol.GetType(); + isInteresting = isInteresting && isTypeSized(mirType); + isInteresting = isInteresting && mirType->GetSize() > 0; + isInteresting = isInteresting && !symbol.IsConst(); + + if (mirType->GetKind() == kTypeScalar || mirType->GetKind() == kTypePointer || mirType->GetKind() == kTypeBitField) { + isInteresting = false; + } + + ProcessedSymbols[&symbol] = isInteresting; + return isInteresting; +} + +bool AddressSanitizer::isInterestingAlloca(const UnaryNode &unaryNode) { + if (ProcessedAllocas.find(&unaryNode) != ProcessedAllocas.end()) { + return ProcessedAllocas[&unaryNode]; + } + bool isInteresting = true; + + const ConstvalNode *constvalNode = dynamic_cast(unaryNode.Opnd(0)); + if (constvalNode) { + const MIRIntConst *mirConst = dynamic_cast(constvalNode->GetConstVal()); + isInteresting = mirConst->GetValue().GetExtValue() > 0; + } + ProcessedAllocas[&unaryNode] = isInteresting; + return isInteresting; +} + +} // namespace maple + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/src/asan_mapping.cpp b/src/mapleall/maple_san/src/asan_mapping.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a0ad1f118a04efaf62c741c63afde2b2f48612e4 --- /dev/null +++ b/src/mapleall/maple_san/src/asan_mapping.cpp @@ -0,0 +1,128 @@ +#ifdef ENABLE_MAPLE_SAN + +#include "asan_mapping.h" + +#include +#include + +#include "san_common.h" + +namespace maple { + +inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { + assert(Align != 0u && "Align can't be 0."); + Skew %= Align; + return (((((Value + Align) - 1) - Skew) / Align) * Align) + Skew; +} + +static inline bool CompareVars(const ASanStackVariableDescription &a, const ASanStackVariableDescription &b) { + return a.Alignment > b.Alignment; +} + +static size_t VarAndRedzoneSize(size_t Size, size_t Granularity, size_t Alignment) { + size_t Res = 0; + if (Size <= 4) { + Res = 16; + } else if (Size <= 16) { + Res = 32; + } else if (Size <= 128) { + Res = Size + 32; + } else if (Size <= 512) { + Res = Size + 64; + } else if (Size <= 4096) { + Res = Size + 128; + } else { + Res = Size + 256; + } + return alignTo(std::max(Res, 2 * Granularity), Alignment); +} + +std::string ComputeASanStackFrameDescription(const std::vector &vars) { + std::stringstream stackDescription; + stackDescription << vars.size(); + + for (const auto &var : vars) { + std::string name = var.Name; + stackDescription << " " << var.Offset << " " << var.Size << " " << name.size() << " " << name; + } + return stackDescription.str(); +} + +std::vector GetShadowBytes(const std::vector &Vars, + const ASanStackFrameLayout &Layout) { + assert(Vars.size() > 0); + std::vector vector; + vector.clear(); + const size_t granularity = Layout.Granularity; + vector.resize(Vars[0].Offset / granularity, kAsanStackLeftRedzoneMagic); + for (const auto &var : Vars) { + vector.resize(var.Offset / granularity, kAsanStackMidRedzoneMagic); + + vector.resize(vector.size() + var.Size / granularity, 0); + if (var.Size % granularity) { + vector.push_back(var.Size % granularity); + } + } + vector.resize(Layout.FrameSize / granularity, kAsanStackRightRedzoneMagic); + return vector; +} + +std::vector GetShadowBytesAfterScope(const std::vector &Vars, + const ASanStackFrameLayout &Layout) { + std::vector SB = GetShadowBytes(Vars, Layout); + const size_t Granularity = Layout.Granularity; + + for (const auto &Var : Vars) { + assert(Var.LifetimeSize <= Var.Size); + const size_t LifetimeShadowSize = (Var.LifetimeSize + Granularity - 1) / Granularity; + const size_t Offset = Var.Offset / Granularity; + auto Last = SB.begin() + Offset + LifetimeShadowSize; + for (auto Start = SB.begin() + Offset; Start != Last; ++Start) { + *Start = kAsanStackUseAfterScopeMagic; + } + } + + return SB; +} + +ASanStackFrameLayout ComputeASanStackFrameLayout(std::vector &Vars, size_t Granularity, + size_t MinHeaderSize) { + assert(Granularity >= 8 && Granularity <= 64 && (Granularity & (Granularity - 1)) == 0); + assert(MinHeaderSize >= 16 && (MinHeaderSize & (MinHeaderSize - 1)) == 0 && MinHeaderSize >= Granularity); + const size_t NumVars = Vars.size(); + assert(NumVars > 0); + for (size_t i = 0; i < NumVars; i++) { + Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment); + } + + std::sort(Vars.begin(), Vars.end(), CompareVars); + + ASanStackFrameLayout Layout; + Layout.Granularity = Granularity; + Layout.FrameAlignment = std::max(Granularity, Vars[0].Alignment); + size_t Offset = std::max(std::max(MinHeaderSize, Granularity), Vars[0].Alignment); + CHECK_FATAL((Offset % Granularity) == 0, "Offset cannot be divided by size of each item."); + for (size_t i = 0; i < NumVars; i++) { + bool IsLast = i == NumVars - 1; + size_t Alignment = std::max(Granularity, Vars[i].Alignment); + (void)Alignment; // Used only in asserts. + size_t Size = Vars[i].Size; + CHECK_FATAL((Alignment & (Alignment - 1)) == 0, "`Alignment = 1` is not supported"); + CHECK_FATAL(Layout.FrameAlignment >= Alignment, "Stack frame alignment is smaller than the alignment"); + CHECK_FATAL((Offset % Alignment) == 0, "Offset cannot be divided by alignment"); + CHECK_FATAL(Size > 0, "We get variable with 0 byte"); + size_t NextAlignment = IsLast ? Granularity : std::max(Granularity, Vars[i + 1].Alignment); + size_t SizeWithRedzone = VarAndRedzoneSize(Size, Granularity, NextAlignment); + Vars[i].Offset = Offset; + Offset += SizeWithRedzone; + } + if (Offset % MinHeaderSize) { + Offset += MinHeaderSize - (Offset % MinHeaderSize); + } + Layout.FrameSize = Offset; + assert((Layout.FrameSize % MinHeaderSize) == 0); + return Layout; +} +} // namespace maple + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/src/asan_module.cpp b/src/mapleall/maple_san/src/asan_module.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0e944d9c339c03a632e0bd513b55542c50bb00a1 --- /dev/null +++ b/src/mapleall/maple_san/src/asan_module.cpp @@ -0,0 +1,399 @@ +// +// Created by wchenbt on 4/4/2021. +// +#ifdef ENABLE_MAPLE_SAN + +#include "asan_module.h" +#include "mir_builder.h" +#include "asan_interfaces.h" + +namespace maple { + void ModuleAddressSanitizer::initializeCallbacks() { + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + + ArgVector args(module->GetMPAllocator().Adapter()); + MIRFunction *init_func = mirBuilder->CreateFunction("__cxx_global_var_init", + *GlobalTables::GetTypeTable().GetVoid(), + args, false, true); + MIRFunction *fini_func = mirBuilder->CreateFunction("__cxx_global_var_fini", + *GlobalTables::GetTypeTable().GetVoid(), + args, false, true); + init_func->SetAttr(FUNCATTR_local); + fini_func->SetAttr(FUNCATTR_local); + + module->AddFunction(init_func); + module->AddFunction(fini_func); + MIRType *retType = GlobalTables::GetTypeTable().GetVoid(); + + // Declare functions that register/unregister globals. + AsanRegisterGlobals = getOrInsertFunction( + mirBuilder, kAsanRegisterGlobalsName, retType, {IntPtrTy, IntPtrTy}); + AsanUnregisterGlobals = getOrInsertFunction( + mirBuilder, kAsanUnregisterGlobalsName, retType, {IntPtrTy, IntPtrTy}); + + } + + bool ModuleAddressSanitizer::instrumentModule() { + initializeCallbacks(); + MapleVector args(module->GetMIRBuilder()->GetCurrentFuncCodeMpAllocator()->Adapter()); + BlockNode *ctorToBeInserted = CreateCtorAndInitFunctions(kAsanModuleCtorName, kAsanInitName, args); + + InstrumentGlobals(ctorToBeInserted); + + appendToGlobalCtors(*module, AsanCtorFunction); + if (AsanDtorFunction) { + appendToGlobalDtors(*module, AsanDtorFunction); + } + module->SetSomeSymbolNeedForDecl(false); + return true; + } + + bool ModuleAddressSanitizer::InstrumentGlobals(BlockNode *ctorToBeInserted) { + std::vector globalsToChange; + for (MIRSymbol *global : GetGlobalVaribles(*module)) { + if (ShouldInstrumentGlobal(global)) { + globalsToChange.push_back(global); + } + } + + size_t n = globalsToChange.size(); + if (n == 0) { + return false; + } + FieldVector fieldVector; + FieldVector parentFileds; + std::vector newGlobals(n); + std::vector initializers(n); + + // We initialize an array of such structures and pass it to a run-time call. + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "beg", *IntPtrTy); + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "size", *IntPtrTy); + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "size_with_redzone", *IntPtrTy); + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "name", *IntPtrTy); + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "module_name", *IntPtrTy); + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "has_dynamic_init", *IntPtrTy); + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "source_location", *IntPtrTy); + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "odr_indicator", *IntPtrTy); + // Create new type for global with redzones + MIRStructType *globalStructForInitTy = static_cast( + GlobalTables::GetTypeTable().GetOrCreateStructType( + "GlobalStruct", fieldVector, parentFileds, *module)); + + for (size_t i = 0; i < n; i++) { + static const uint64_t kMaxGlobalRedzone = 1 << 18; + MIRSymbol *global = globalsToChange[i]; + // Compute the size of redzone + size_t sizeInBytes = global->GetType()->GetSize(); + size_t minRedZone = MinRedzoneSizeForGlobal(); + size_t redzone = std::max(minRedZone, + std::min(kMaxGlobalRedzone, ((sizeInBytes / minRedZone) / 4) * minRedZone)); + size_t rightRedzoneSize = redzone; + if (sizeInBytes % minRedZone) { + rightRedzoneSize += minRedZone - (sizeInBytes % minRedZone); + } + ASSERT(((rightRedzoneSize + sizeInBytes) % minRedZone) == 0, + "rightRedzoneSize + sizeInBytes cannot be divided by minRedZone"); + + // Create new type for global with redzones + fieldVector.clear(); + parentFileds.clear(); + CHECK_FATAL(rightRedzoneSize < UINT32_MAX, "Too large redzone size."); + MIRArrayType *rightRedZoneTy = GlobalTables::GetTypeTable().GetOrCreateArrayType( + *GlobalTables::GetTypeTable().GetInt8(), uint32_t(rightRedzoneSize)); + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "orig", *global->GetType()); + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "redzone", *rightRedZoneTy); + MIRStructType *newGlobalType = static_cast( + GlobalTables::GetTypeTable().GetOrCreateStructType( + "NewGlobal_" + global->GetName(), fieldVector, parentFileds, *module)); + + // Create new variable for global with redzones + MIRSymbol *newGlobalVar = module->GetMIRBuilder()->CreateSymbol( + newGlobalType->GetTypeIndex(), "", global->GetSKind(), + global->GetStorageClass(), nullptr, kScopeGlobal); + + // Initialize the new global + MIRAggConst *newGlobalConst = module->GetMemPool()-> + New(*module, *newGlobalVar->GetType()); + // Initialize the field orig + MIRConst *globalConst = global->GetKonst(); + MIRConst *globalConstClone; + if (globalConst->GetKind() == kConstInt) { + globalConstClone = GlobalTables::GetIntConstTable().GetOrCreateIntConst( + static_cast(globalConst)->GetValue(), globalConst->GetType()); + } else { + globalConstClone = globalConst->Clone(*module->GetMemPool()); + } + newGlobalConst->AddItem(globalConstClone, 1); + // Initialize the field redzone + MIRAggConst *arrayConst = module->GetMemPool()->New(*module, *rightRedZoneTy); + for (size_t j = 0; j < rightRedzoneSize; j++) { + arrayConst->AddItem(GlobalTables::GetIntConstTable().GetOrCreateIntConst( + 0, *GlobalTables::GetTypeTable().GetInt8()), 0); + } + + newGlobalConst->AddItem(arrayConst, 2); + // Set the initialized value to + newGlobalVar->SetKonst(newGlobalConst); + // Make the new created one the same as the old global variable + newGlobalVar->SetAttrs(global->GetAttrs()); + newGlobalVar->SetNameStrIdx(global->GetName()); + // Set source location + newGlobalVar->SetSrcPosition(global->GetSrcPosition()); + + // replace global variable field Id + for (MIRSymbol *mirSymbol: symbolUsedInInit[newGlobalVar->GetName()]) { + MIRAddrofConst *mirAddrofConst = dynamic_cast(mirSymbol->GetKonst()); + MIRAddrofConst *newAddrofConst = module->GetMemPool()->New( + mirAddrofConst->GetSymbolIndex(), 1, mirAddrofConst->GetType()); + mirSymbol->SetKonst(newAddrofConst); + } + // replace statement field Id + for (BaseNode *stmtNode: symbolUsedInStmt[newGlobalVar->GetName()]) { + switch (stmtNode->GetOpCode()) { + case OP_dassign: { + DassignNode *dassignNode = dynamic_cast(stmtNode); + dassignNode->SetStIdx(newGlobalVar->GetStIdx()); + dassignNode->SetFieldID(1 + dassignNode->GetFieldID()); + break; + } + case OP_dread: + case OP_addrof: { + AddrofNode *addrofNode = dynamic_cast(stmtNode); + addrofNode->SetStIdx(newGlobalVar->GetStIdx()); + addrofNode->SetFieldID(1 + addrofNode->GetFieldID()); + break; + } + case OP_callassigned: { + CallNode *callNode = dynamic_cast(stmtNode); + CallReturnVector &callRet = callNode->GetReturnVec(); + for (size_t j = 0; j < callRet.size(); j++) { + StIdx idx = callRet[j].first; + RegFieldPair regFieldPair = callRet[j].second; + if (!regFieldPair.IsReg()) { + if (idx == global->GetStIdx()) { + callRet[j].first = newGlobalVar->GetStIdx(); + callRet[j].second.SetFieldID(1 + callRet[j].second.GetFieldID()); + } + } + } + break; + } + default: { + } + } + } + global->SetIsDeleted(); + newGlobalVar->ResetIsDeleted(); + // Create a new variable and construct its initial value + MIRAggConst *initializer = module->GetMemPool()->New(*module, *globalStructForInitTy); + + // begin + MIRAddrofConst *beginConst = createAddrofConst(*module, newGlobalVar, IntPtrPrim); + initializer->AddItem(beginConst, 1); + // size + MIRIntConst *sizeInBytesConst = GlobalTables::GetIntConstTable(). + GetOrCreateIntConst(sizeInBytes, *IntPtrTy); + initializer->AddItem(sizeInBytesConst, 2); + // size with redzone + MIRIntConst *sizeWithRedzoneConst = GlobalTables::GetIntConstTable(). + GetOrCreateIntConst(sizeInBytes + rightRedzoneSize, *IntPtrTy); + initializer->AddItem(sizeWithRedzoneConst, 3); + // variable name + MIRStrConst *nameConst = createStringConst(*module, newGlobalVar->GetName(), PTY_a64); + initializer->AddItem(nameConst, 4); + // module name + MIRStrConst *moduleNameConst = createStringConst(*module, module->GetFileName(), PTY_a64); + initializer->AddItem(moduleNameConst, 5); + // isDynInit + MIRIntConst *isDynInit = GlobalTables::GetIntConstTable().GetOrCreateIntConst(0, *IntPtrTy); + initializer->AddItem(isDynInit, 6); + // Set source location + MIRConst *sourceLocConst = createSourceLocConst(*module, newGlobalVar, IntPtrPrim); + initializer->AddItem(sourceLocConst, 7); + // Set OdrIndicator + MIRConst *odrIndicator = GlobalTables::GetIntConstTable().GetOrCreateIntConst(0, *IntPtrTy); + initializer->AddItem(odrIndicator, 8); + // Set the value of initializer + LogInfo::MapleLogger() << "NEW GLOBAL: " << newGlobalVar->GetName() << "\n"; + newGlobals[i] = newGlobalVar; + initializers[i] = initializer; + } + InstrumentGlobalsWithMetadataArray(ctorToBeInserted, newGlobals, initializers); + return false; + } + + bool ModuleAddressSanitizer::ShouldInstrumentGlobal(MIRSymbol *var) { + MIRType *type = GlobalTables::GetTypeTable().GetTypeFromTyIdx(var->GetTyIdx()); + if (type == nullptr) { + return false; + } + if (!isTypeSized(type)) { + return false; + } + if (var->GetValue().konst == nullptr) { + return false; + } + if (type->GetAlign() > MinRedzoneSizeForGlobal()) { + return false; + } + return true; + } + + void ModuleAddressSanitizer::InstrumentGlobalsWithMetadataArray( + BlockNode *ctorToBeInserted, + const std::vector ExtendedGlobals, + std::vector MetadataInitializers) { + assert(ExtendedGlobals.size() == MetadataInitializers.size()); + size_t N = ExtendedGlobals.size(); + CHECK_FATAL(N > 0, "Zero size extended globals."); + MIRArrayType *arrayOfGlobalStructTy = GlobalTables::GetTypeTable().GetOrCreateArrayType( + MetadataInitializers[0]->GetType(), N); + MIRAggConst *allGlobalsConst = module->GetMemPool()->New(*module, *arrayOfGlobalStructTy); + for (MIRConst *meta: MetadataInitializers) { + allGlobalsConst->PushBack(meta); + } + + MIRSymbol *allGlobalsVar = module->GetMIRBuilder()->CreateSymbol( + arrayOfGlobalStructTy->GetTypeIndex(), "allGlobals", kStConst, kScFstatic, nullptr, kScopeGlobal); + allGlobalsVar->SetKonst(allGlobalsConst); + MapleVector registerGlobal(module->GetMPAllocator().Adapter()); + AddrofNode *addrofNode = module->GetMIRBuilder()->CreateAddrof(*allGlobalsVar, IntPtrPrim); + ConstvalNode *constvalNode = module->GetMIRBuilder()->CreateIntConst(N, IntPtrPrim); + registerGlobal.emplace_back(addrofNode); + registerGlobal.emplace_back(constvalNode); + CallNode *registerCallNode = module->GetMIRBuilder()->CreateStmtCall( + AsanRegisterGlobals->GetPuidx(), registerGlobal); + ctorToBeInserted->InsertBefore(ctorToBeInserted->GetLast(), registerCallNode); + BlockNode *dtorTobeInserted = CreateModuleDtor(); + // We also need to unregister globals at the end, e.g., when a shared library + // gets closed. + CallNode *unRegisterCallNode = module->GetMIRBuilder()->CreateStmtCall( + AsanUnregisterGlobals->GetPuidx(), registerGlobal); + dtorTobeInserted->InsertBefore(dtorTobeInserted->GetLast(), unRegisterCallNode); + } + + BlockNode *ModuleAddressSanitizer::CreateCtorAndInitFunctions( + const std::string CtorName, const std::string InitName, const MapleVector InitArgs) { + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + ArgVector args(module->GetMPAllocator().Adapter()); + AsanCtorFunction = mirBuilder->CreateFunction(CtorName, *GlobalTables::GetTypeTable().GetVoid(), args); + module->AddFunction(AsanCtorFunction); + AsanCtorFunction->SetAttr(FUNCATTR_local); + BlockNode *asanCtorBlock = AsanCtorFunction->GetBody(); + StmtNode *retNode = mirBuilder->CreateStmtReturn(nullptr); + asanCtorBlock->AddStatement(retNode); + + MIRFunction *initFunction = getOrInsertFunction(mirBuilder, InitName.c_str(), + GlobalTables::GetTypeTable().GetVoid(), {}); + CallNode *callInitNode = mirBuilder->CreateStmtCall(initFunction->GetPuidx(), InitArgs); + + asanCtorBlock->InsertBefore(retNode, callInitNode); + return asanCtorBlock; + } + + BlockNode *ModuleAddressSanitizer::CreateModuleDtor() { + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + ArgVector args(module->GetMPAllocator().Adapter()); + AsanDtorFunction = mirBuilder->CreateFunction(kAsanModuleDtorName, + *GlobalTables::GetTypeTable().GetVoid(), args); + module->AddFunction(AsanDtorFunction); + AsanDtorFunction->SetAttr(FUNCATTR_local); + BlockNode *asanDtorBlock = AsanDtorFunction->GetBody(); + StmtNode *retNode = mirBuilder->CreateStmtReturn(nullptr); + asanDtorBlock->AddStatement(retNode); + + return asanDtorBlock; + } + + void ModuleAddressSanitizer::GetGlobalSymbolUsage() { + // Replace all old global users with new global + for (MIRFunction *func : module->GetFunctionList()) { + if (func == nullptr || func->GetBody() == nullptr) { + continue; + } + std::stack baseNodeStack; + StmtNodes &stmtNodes = func->GetBody()->GetStmtNodes(); + for (StmtNode &stmt : stmtNodes) { + baseNodeStack.push(&stmt); + } + + while (!baseNodeStack.empty()) { + BaseNode *baseNode = baseNodeStack.top(); + baseNodeStack.pop(); + switch (baseNode->GetOpCode()) { + case OP_dassign: { + DassignNode *dassignNode = dynamic_cast(baseNode); + MIRSymbol *mirSymbol = func->GetLocalOrGlobalSymbol(dassignNode->GetStIdx()); + if (mirSymbol->IsGlobal()) { + if (symbolUsedInStmt.count(mirSymbol->GetName()) == 0) { + symbolUsedInStmt[mirSymbol->GetName()] = {}; + } + symbolUsedInStmt[mirSymbol->GetName()].insert(dassignNode); + } + break; + } + case OP_dread: + case OP_addrof: { + AddrofNode *addrofNode = dynamic_cast(baseNode); + MIRSymbol *mirSymbol = func->GetLocalOrGlobalSymbol(addrofNode->GetStIdx()); + if (mirSymbol->IsGlobal()) { + if (symbolUsedInStmt.count(mirSymbol->GetName()) == 0) { + symbolUsedInStmt[mirSymbol->GetName()] = {}; + } + symbolUsedInStmt[mirSymbol->GetName()].insert(addrofNode); + } + break; + } + case OP_callassigned: { + CallNode *callNode = dynamic_cast(baseNode); + CallReturnVector &callRet = callNode->GetReturnVec(); + for (size_t i = 0; i < callRet.size(); i++) { + StIdx idx = callRet[i].first; + RegFieldPair regFieldPair = callRet[i].second; + if (!regFieldPair.IsReg()) { + MIRSymbol *mirSymbol = func->GetLocalOrGlobalSymbol(idx); + if (mirSymbol->IsGlobal()) { + if (symbolUsedInStmt.count(mirSymbol->GetName()) == 0) { + symbolUsedInStmt[mirSymbol->GetName()] = {}; + } + symbolUsedInStmt[mirSymbol->GetName()].insert(callNode); + } + } + } + break; + } + default: + break; + } + for (size_t j = 0; j < baseNode->NumOpnds(); j++) { + baseNodeStack.push(baseNode->Opnd(j)); + } + } + } + for (MIRSymbol *mirSymbol: GetGlobalVaribles(*module)) { + if (mirSymbol->GetKonst()) { + MIRConst *mirConst = mirSymbol->GetKonst(); + if (mirConst->GetKind() == kConstAddrof) { + MIRAddrofConst *mirAddrofConst = dynamic_cast(mirConst); + MIRSymbol *mirSymbolUsed = GlobalTables::GetGsymTable().GetSymbolFromStidx(mirAddrofConst->GetSymbolIndex().Idx()); + if (symbolUsedInInit.count(mirSymbolUsed->GetName()) == 0) { + symbolUsedInInit[mirSymbolUsed->GetName()] = {}; + } + symbolUsedInInit[mirSymbolUsed->GetName()].insert(mirSymbol); + } + } + } + } +} + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/src/asan_phases.cpp b/src/mapleall/maple_san/src/asan_phases.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cccfde11602448cebb40a698382bd7c34ec4ce4f --- /dev/null +++ b/src/mapleall/maple_san/src/asan_phases.cpp @@ -0,0 +1,90 @@ +#ifdef ENABLE_MAPLE_SAN + +#include "asan_phases.h" +#include "asan_function.h" +#include "asan_module.h" +#include "me_cfg.h" +#include "mempool.h" + + +namespace maple { + +void MEDoAsan::GetAnalysisDependence(maple::AnalysisDep &aDep) const { + aDep.AddRequired(); + aDep.SetPreservedAll(); +} + +bool MEDoAsan::PhaseRun(maple::MeFunction &f) { + // The reture value is said to show whether this phase modifies IR + // The document said the return value is not used + PreAnalysis *symbol_interesting = GET_ANALYSIS(MEDoVarCheck, f); + if (symbol_interesting == nullptr) { + LogInfo::MapleLogger() << "The MEDoVarCheck::PhaseRun is not called " << f.GetName() << "\n"; + } + LogInfo::MapleLogger() << "The MEDoAsan::PhaseRun is running " << f.GetName() << "\n"; + AddressSanitizer Asan(f.GetMIRModule(), symbol_interesting); + Asan.instrumentFunction(f); + return true; +} + +void MEDoVarCheck::GetAnalysisDependence(maple::AnalysisDep &aDep) const { + aDep.SetPreservedAll(); +} + +bool MEDoVarCheck::PhaseRun(maple::MeFunction &f) { + LogInfo::MapleLogger() << "The MEDoVarCheck::PhaseRun is running " << f.GetName() << "\n"; + MemPool *memPool = GetPhaseMemPool(); + PreAnalysis *preAnalysis = memPool->New(*memPool); + + MIRFunction *mirFunction = f.GetMirFunc(); + std::set addrOfSymList; + + std::stack baseNodeStack; + for (StmtNode &stmt : mirFunction->GetBody()->GetStmtNodes()) { + baseNodeStack.push(&stmt); + } + + while (!baseNodeStack.empty()) { + BaseNode *baseNode = baseNodeStack.top(); + baseNodeStack.pop(); + if (baseNode->GetOpCode() == OP_addrof) { + AddrofNode *addrofNode = dynamic_cast(baseNode); + MIRSymbol *mirSymbol = mirFunction->GetLocalOrGlobalSymbol(addrofNode->GetStIdx()); + addrOfSymList.insert(mirSymbol); + } + for (size_t j = 0; j < baseNode->NumOpnds(); j++) { + baseNodeStack.push(baseNode->Opnd(j)); + } + } + + MIRSymbolTable *symbolTable = mirFunction->GetSymTab(); + size_t size = symbolTable->GetSymbolTableSize(); + // LabelIdx is the type of uint32_t, we should have a LabelIdx_MAX + CHECK_FATAL(size < UINT32_MAX, "Too large table size"); + for (uint32_t i = 0; i < uint32_t(size); ++i) { + MIRSymbol *symbol = symbolTable->GetSymbolFromStIdx(LabelIdx(i)); + if (symbol == nullptr) { + continue; + } + if (symbol->IsDeleted() || symbol->GetName() == "") { + continue; + } + + for (MIRSymbol *mirSymbol : addrOfSymList) { + if (mirSymbol->GetStIdx() == symbol->GetStIdx()) { + preAnalysis->usedInAddrof.push_back(symbol); + } + } + } + this->result = preAnalysis; + LogInfo::MapleLogger() << "The MEDoVarCheck::PhaseRun ends " << f.GetName() << "\n"; + return true; +} + +PreAnalysis* MEDoVarCheck::GetResult() { + return this->result; +} + +} // namespace maple + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/src/asan_stackvar.cpp b/src/mapleall/maple_san/src/asan_stackvar.cpp new file mode 100644 index 0000000000000000000000000000000000000000..87f4d884ec31a76db1594393cee23e74d74f39be --- /dev/null +++ b/src/mapleall/maple_san/src/asan_stackvar.cpp @@ -0,0 +1,684 @@ +// +// Created by wchenbt on 3/4/2021. +// +#ifdef ENABLE_MAPLE_SAN + +#include "asan_stackvar.h" + +#include + +#include "san_common.h" +#include + +#define ENABLE_STACK_SIZE_LIMIT false + +namespace maple { + +// Fake stack allocator (asan_fake_stack.h) has 11 size classes +// for every power of 2 from kMinStackMallocSize to kMaxAsanStackMallocSizeClass +static int StackMallocSizeClass(uint64_t LocalStackSize) { + // unblock the stack size limit + if (ENABLE_STACK_SIZE_LIMIT) { + CHECK_FATAL(LocalStackSize <= kMaxStackMallocSize, "Too large stack size."); + } + uint64_t MaxSize = kMinStackMallocSize; + for (int i = 0;; i++, MaxSize *= 2) { + if (LocalStackSize <= MaxSize) { + return i; + } + } +} + +FunctionStackPoisoner::FunctionStackPoisoner(MeFunction &function, AddressSanitizer &asan) + : ASan(asan), + meFunction(&function), + mirFunction(function.GetMirFunc()), + module(mirFunction->GetModule()), + IntptrTy(asan.IntPtrTy), + Mapping(asan.Mapping), + StackAlignment(1 << Mapping.Scale) { + IntptrPtrTy = GlobalTables::GetTypeTable().GetOrCreatePointerType(*IntptrTy, PTY_ptr); + // initialize function arguments symbols + for (size_t i = 0; i < mirFunction->GetFormalCount(); ++i) { + const MIRSymbol *symbolPtr = mirFunction->GetFormal(i); + callArgSymbols.insert(symbolPtr); + callArgSymbolNames.insert(symbolPtr->GetName()); + } + initializeCallbacks(*module); +} + +void FunctionStackPoisoner::copyToShadow(const std::vector ShadowMask, const std::vector ShadowBytes, + MIRBuilder *mirBuilder, BaseNode *ShadowBase, StmtNode *InsBefore) { + copyToShadow(ShadowMask, ShadowBytes, 0, ShadowMask.size(), mirBuilder, ShadowBase, InsBefore); +} + +void FunctionStackPoisoner::copyToShadow(const std::vector ShadowMask, const std::vector ShadowBytes, + size_t Begin, size_t End, MIRBuilder *mirBuilder, BaseNode *ShadowBase, + StmtNode *InsBefore) { + assert(ShadowMask.size() == ShadowBytes.size()); + size_t Done = Begin; + const size_t threshold = 64; + for (size_t i = Begin, j = Begin + 1; i < End; i = j++) { + if (!ShadowMask[i]) { + assert(!ShadowBytes[i]); + continue; + } + uint8_t Val = ShadowBytes[i]; + if (!AsanSetShadowFunc[Val]) { + continue; + } + // Skip same values. + while (j < End && ShadowMask[j] && Val == ShadowBytes[j]) { + ++j; + } + + if (j - i >= threshold) { + copyToShadowInline(ShadowMask, ShadowBytes, Done, i, mirBuilder, ShadowBase, InsBefore); + Done = j; + } + } + + copyToShadowInline(ShadowMask, ShadowBytes, Done, End, mirBuilder, ShadowBase, InsBefore); +} + +void FunctionStackPoisoner::copyToShadowInline(const std::vector ShadowMask, const std::vector ShadowBytes, + size_t Begin, size_t End, MIRBuilder *mirBuilder, BaseNode *ShadowBase, + StmtNode *InsBefore) { + if (Begin >= End) { + return; + } + + const size_t LargestStoreSizeInBytes = std::min(sizeof(uint64_t), ASan.LongSize / 8); + + for (size_t i = Begin; i < End;) { + if (!ShadowMask[i]) { + assert(!ShadowBytes[i]); + ++i; + continue; + } + + size_t StoreSizeInBytes = LargestStoreSizeInBytes; + // Fit store size into the range. + while (StoreSizeInBytes > End - i) { + StoreSizeInBytes /= 2; + } + + // Minimize store size by trimming trailing zeros. + for (size_t j = StoreSizeInBytes - 1; j && !ShadowMask[i + j]; --j) { + while (j <= StoreSizeInBytes / 2) { + StoreSizeInBytes /= 2; + } + } + + uint64_t Val = 0; + for (size_t j = 0; j < StoreSizeInBytes; j++) { + Val |= uint64_t(ShadowBytes[i + j]) << (8 * j); + } + + BinaryNode *Ptr = mirBuilder->CreateExprBinary(OP_add, *IntptrTy, ShadowBase, + mirBuilder->CreateIntConst(i, IntptrTy->GetPrimType())); + PrimType primType; + switch (StoreSizeInBytes * 8) { + case 8: + primType = PTY_i8; + break; + case 16: + primType = PTY_i16; + break; + case 32: + primType = PTY_i32; + break; + case 64: + primType = PTY_i64; + break; + default: { + primType = PTY_unknown; + } + } + ConstvalNode *Poison = mirBuilder->CreateIntConst(Val, primType); + MIRType *ptrType = GlobalTables::GetTypeTable().GetOrCreatePointerType( + GlobalTables::GetTypeTable().GetPrimType(primType)->GetTypeIndex()); + IassignNode *iassignNode = mirBuilder->CreateStmtIassign(*ptrType, 0, Ptr, Poison); + iassignNode->InsertAfterThis(*InsBefore); + i += StoreSizeInBytes; + } +} + +void FunctionStackPoisoner::initializeCallbacks(const MIRModule &M) { + MIRBuilder *mirBuilder = M.GetMIRBuilder(); +#ifdef ENABLERBTREE +#else + for (size_t Val : {0x00, 0xf1, 0xf2, 0xf3, 0xf5, 0xf8}) { + std::ostringstream Name; + Name << kAsanSetShadowPrefix; + Name << std::setw(2) << std::setfill('0') << std::hex << Val; + AsanSetShadowFunc[Val] = getOrInsertFunction(mirBuilder, Name.str().c_str(), GlobalTables::GetTypeTable().GetVoid(), + {IntptrTy, IntptrTy}); + } +#endif + AsanAllocaPoisonFunc = + getOrInsertFunction(mirBuilder, kAsanAllocaPoison, GlobalTables::GetTypeTable().GetVoid(), {IntptrTy, IntptrTy}); + AsanAllocasUnpoisonFunc = getOrInsertFunction(mirBuilder, kAsanAllocasUnpoison, + GlobalTables::GetTypeTable().GetVoid(), {IntptrTy, IntptrTy}); +} + +std::set FunctionStackPoisoner::GetStackVarReferedByCallassigned() { + std::set retOfCallassigned; + MIRSymbolTable *symbolTable = mirFunction->GetSymTab(); + for (StmtNode &stmt : mirFunction->GetBody()->GetStmtNodes()) { + if (stmt.GetOpCode() == OP_callassigned) { + CallNode *tmp = dynamic_cast(&stmt); + const CallReturnVector *retVecPtr = tmp->GetCallReturnVector(); + // get the return value being refered by a function call + for (const CallReturnPair &retPair : *retVecPtr) { + MIRSymbol *symbol = symbolTable->GetSymbolFromStIdx(retPair.first.Idx()); + retOfCallassigned.insert(symbol); + } + } + } + return retOfCallassigned; +} + +void FunctionStackPoisoner::collectLocalVariablesWithoutAlloca() { + // Collect all variables refered by return statement of callassigned + std::set retOfCallassigned = GetStackVarReferedByCallassigned(); + MIRSymbolTable *symbolTable = mirFunction->GetSymTab(); + size_t size = symbolTable->GetSymbolTableSize(); + CHECK_FATAL(size < UINT32_MAX, "Too large symbol table size."); + for (uint32_t i = 0; i < uint32_t(size); ++i) { + MIRSymbol *symbol = symbolTable->GetSymbolFromStIdx(LabelIdx(i)); + if (symbol == nullptr) { + continue; + } + if (symbol->IsDeleted() || symbol->GetName() == "") { + continue; + } + if (ASan.isInterestingSymbol(*symbol)) { + if (StringUtils::StartsWith(symbol->GetName(), "asan_")) { + continue; + } + // we skip symbols being refered by callassigned as a return value + if (retOfCallassigned.find(symbol) != retOfCallassigned.end()) { + continue; + } + StackAlignment = std::max(StackAlignment, symbol->GetType()->GetAlign()); + ASanStackVariableDescription description = {symbol->GetName(), + symbol->GetType()->GetSize(), + 0, + symbol->GetType()->GetAlign(), + symbol, + nullptr, + 0, + symbol->GetSrcPosition().LineNum()}; + stackVariableDesc.push_back(description); + } + } +} + +void FunctionStackPoisoner::collectDescFromUnaryStmtNode(UnaryStmtNode &assignNode) { + BaseNode *baseNode = assignNode.GetRHS(); + while (baseNode) { + UnaryNode *rhs = dynamic_cast(baseNode); + if (rhs == nullptr) { + return; + } + if (rhs->GetOpCode() == OP_alloca && ASan.isInterestingAlloca(*rhs)) { + ConstvalNode *constvalNode = dynamic_cast(rhs->Opnd(0)); + if (constvalNode && isInFirstBlock(&assignNode)) { + // static alloca + MIRIntConst *mirConst = dynamic_cast(constvalNode->GetConstVal()); + ASanStackVariableDescription description = { + "", static_cast(mirConst->GetValue().GetZXTValue()), + 0, 0, + nullptr, &assignNode, + 0, assignNode.GetSrcPos().LineNum()}; + stackVariableDesc.push_back(description); + } else { + // dynamic alloca + ASanDynaVariableDescription description = {"", rhs->Opnd(0), &assignNode, 0, assignNode.GetSrcPos().LineNum()}; + dynamicAllocaDesc.push_back(description); + } + } + baseNode = rhs->Opnd(0); + } +} + +void FunctionStackPoisoner::collectLocalVariablesWithAlloca() { + for (StmtNode &stmt : mirFunction->GetBody()->GetStmtNodes()) { + if (stmt.GetOpCode() == OP_regassign || stmt.GetOpCode() == OP_dassign) { + UnaryStmtNode *assignNode = dynamic_cast(&stmt); + CHECK_FATAL(assignNode != nullptr, "Node with OP_regassign or OP_dassign is not UnaryStmtNode."); + collectDescFromUnaryStmtNode(*assignNode); + } + } +} + +bool FunctionStackPoisoner::runOnFunction() { + // Collect alloca, ret, etc. + for (StmtNode &stmt : mirFunction->GetBody()->GetStmtNodes()) { + if (stmt.GetOpCode() == OP_return) { + RetVec.push_back(&stmt); + } + } + // Collect local variable + // initialize stackVariableDesc and dynamicAllocaDesc + collectLocalVariablesWithoutAlloca(); + collectLocalVariablesWithAlloca(); + // ignore variables already been used for alloca + auto iter = stackVariableDesc.begin(); + while (iter != stackVariableDesc.end()) { + if (iter->Symbol != nullptr && isUsedInAlloca[iter->Symbol]) { + iter = stackVariableDesc.erase(iter); + } else { + ++iter; + } + } + if (stackVariableDesc.empty() && dynamicAllocaDesc.empty()) { + return false; + } + + if (!dynamicAllocaDesc.empty()) { + createDynamicAllocasInitStorage(); + for (auto &AI : dynamicAllocaDesc) { + handleDynamicAllocaCall(&AI); + } + unpoisonDynamicAllocas(); + } + processStackVariable(); + return true; +} + +bool FunctionStackPoisoner::isInFirstBlock(StmtNode *stmtNode) { + while (stmtNode) { + if (stmtNode->IsCondBr()) { + CondGotoNode *condGotoNode = dynamic_cast(stmtNode); + ConstvalNode *constvalNode = dynamic_cast(condGotoNode->Opnd(0)); + if (constvalNode == nullptr) { + // the stmt is after a direct branch stmt, cannot be the stmt in the first block + return false; + } + MIRIntConst *mirIntConst = dynamic_cast(constvalNode->GetConstVal()); + if (!(mirIntConst && mirIntConst->GetValue() == 1)) { + return false; + } + } + stmtNode = stmtNode->GetPrev(); + } + return true; +} + +void FunctionStackPoisoner::createDynamicAllocasInitStorage() { + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + DynamicAllocaLayout = getOrCreateSymbol(mirBuilder, IntptrTy->GetTypeIndex(), "asan_dynamic_alloca", kStVar, kScAuto, + mirFunction, kScopeLocal); + DynamicAllocaLayout->GetAttrs().SetAlign(32); + DassignNode *dassignNode = mirBuilder->CreateStmtDassign(DynamicAllocaLayout->GetStIdx(), 0, + mirBuilder->CreateIntConst(0, IntptrTy->GetPrimType())); + mirFunction->GetBody()->InsertBefore(mirFunction->GetBody()->GetFirst(), dassignNode); +} + +void FunctionStackPoisoner::unpoisonDynamicAllocasBeforeInst(StmtNode *InstBefore) { + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + MapleVector args(mirBuilder->GetCurrentFuncCodeMpAllocator()->Adapter()); + args.emplace_back(mirBuilder->CreateDread(*DynamicAllocaLayout, IntptrTy->GetPrimType())); + args.emplace_back(mirBuilder->CreateAddrof(*DynamicAllocaLayout, PTY_u64)); + + CallNode *callNode = mirBuilder->CreateStmtCall(AsanAllocasUnpoisonFunc->GetPuidx(), args); + callNode->InsertAfterThis(*InstBefore); +} + +// Unpoison dynamic allocas redzones. +void FunctionStackPoisoner::unpoisonDynamicAllocas() { + for (auto &Ret : RetVec) { + unpoisonDynamicAllocasBeforeInst(Ret); + } +} + +void FunctionStackPoisoner::handleDynamicAllocaCall(ASanDynaVariableDescription *AI) { + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + const unsigned Align = std::max(kAllocaRzSize, 1U); + const uint64_t AllocaRedzoneMask = kAllocaRzSize - 1U; + + ConstvalNode *Zero = mirBuilder->CreateIntConst(0, IntptrTy->GetPrimType()); + ConstvalNode *AllocaRzSize = mirBuilder->CreateIntConst(kAllocaRzSize, IntptrTy->GetPrimType()); + ConstvalNode *AllocaRzMask = mirBuilder->CreateIntConst(AllocaRedzoneMask, IntptrTy->GetPrimType()); + + BaseNode *NewSize; + if (AI->Size->GetOpCode() == OP_constval) { + ConstvalNode *constvalNode = dynamic_cast(AI->Size); + MIRIntConst *intConst = dynamic_cast(constvalNode->GetConstVal()); + uint64_t PartialSize = uint64_t(intConst->GetValue().GetExtValue()) & AllocaRedzoneMask; + uint64_t MisAlign = kAllocaRzSize - PartialSize; + uint64_t PartialPadding = MisAlign; + if (uint64_t(kAllocaRzSize) == MisAlign) { + PartialPadding = 0; + } + uint64_t AdditionalChunkSize = Align + kAllocaRzSize + PartialPadding; + NewSize = + mirBuilder->CreateIntConst(AdditionalChunkSize + intConst->GetValue().GetExtValue(), IntptrTy->GetPrimType()); + } else { + BaseNode *OldSize = AI->Size; + BinaryNode *PartialSize = mirBuilder->CreateExprBinary(OP_band, *IntptrTy, OldSize, AllocaRzMask); + BaseNode *Misalign = mirBuilder->CreateExprBinary(OP_sub, *IntptrTy, AllocaRzSize, PartialSize); + + MIRSymbol *misAlignSym = getOrCreateSymbol(mirBuilder, IntptrTy->GetTypeIndex(), "asan_misAlign", kStVar, kScAuto, + mirFunction, kScopeLocal); + DassignNode *dassignNode = mirBuilder->CreateStmtDassign(misAlignSym->GetStIdx(), 0, Misalign); + dassignNode->InsertAfterThis(*AI->AllocaInst); + + Misalign = mirBuilder->CreateDread(*misAlignSym, IntptrTy->GetPrimType()); + BinaryNode *Cond = mirBuilder->CreateExprCompare(OP_ne, *IntptrTy, *IntptrTy, Misalign, AllocaRzSize); + TernaryNode *PartialPadding = mirBuilder->CreateExprTernary(OP_select, *IntptrTy, Cond, Misalign, Zero); + // Align is added to locate left redzone, PartialPadding for possible + // partial redzone and kAllocaRzSize for right redzone respectively. + BinaryNode *AdditionalChunkSize = mirBuilder->CreateExprBinary( + OP_add, *IntptrTy, mirBuilder->CreateIntConst(Align + kAllocaRzSize, IntptrTy->GetPrimType()), PartialPadding); + + NewSize = mirBuilder->CreateExprBinary(OP_add, *IntptrTy, OldSize, AdditionalChunkSize); + } + // Insert new alloca with new NewSize and Align params. + MIRType *ptrType = GlobalTables::GetTypeTable().GetOrCreatePointerType(*GlobalTables::GetTypeTable().GetInt8()); + MIRSymbol *tmpAlloca = + getOrCreateSymbol(mirBuilder, ptrType->GetTypeIndex(), "asan_dyn_tmp", kStVar, kScAuto, mirFunction, kScopeLocal); + UnaryNode *NewAlloca = mirBuilder->CreateExprUnary(OP_alloca, *GlobalTables::GetTypeTable().GetAddr64(), NewSize); + + DassignNode *dassignNode = mirBuilder->CreateStmtDassign(tmpAlloca->GetStIdx(), 0, NewAlloca); + dassignNode->InsertAfterThis(*AI->AllocaInst); + assert(AI->AllocaInst->Opnd(0)->GetOpCode() == OP_alloca); + + BinaryNode *NewAddress = mirBuilder->CreateExprBinary(OP_add, *GlobalTables::GetTypeTable().GetPrimType(PTY_u64), + mirBuilder->CreateDread(*tmpAlloca, PTY_a64), + mirBuilder->CreateIntConst(Align, PTY_u64)); + AI->AllocaInst->SetOpnd(NewAddress, 0); + + MapleVector args(mirBuilder->GetCurrentFuncCodeMpAllocator()->Adapter()); + args.emplace_back(NewAddress); + args.emplace_back(AI->Size); + CallNode *callNode = mirBuilder->CreateStmtCall(AsanAllocaPoisonFunc->GetPuidx(), args); + callNode->InsertAfterThis(*AI->AllocaInst); + + // Insert __asan_alloca_poison call for new created alloca. + dassignNode = mirBuilder->CreateStmtDassign( + DynamicAllocaLayout->GetStIdx(), 0, + mirBuilder->CreateExprTypeCvt(OP_cvt, PTY_i64, PTY_u64, *mirBuilder->CreateDread(*tmpAlloca, PTY_a64))); + dassignNode->InsertAfterThis(*AI->AllocaInst); +} + +MIRSymbol *FunctionStackPoisoner::createAllocaForLayout(StmtNode *insBefore, MIRBuilder *mirBuilder, + const ASanStackFrameLayout &L) { + CHECK_FATAL(L.FrameSize < UINT32_MAX, "Too large frame size."); + MIRArrayType *arrayType = + GlobalTables::GetTypeTable().GetOrCreateArrayType(*GlobalTables::GetTypeTable().GetInt8(), uint32_t(L.FrameSize)); + MIRSymbol *tmp = + getOrCreateSymbol(mirBuilder, arrayType->GetTypeIndex(), "asan_tmp", kStVar, kScAuto, mirFunction, kScopeLocal); + size_t realignStack = 32; + assert((realignStack & (realignStack - 1)) == 0); + size_t frameAlignment = std::max(L.FrameAlignment, realignStack); + CHECK_FATAL(frameAlignment < UINT32_MAX, "Too large frameAlignment."); + tmp->GetAttrs().SetAlign(uint32_t(frameAlignment)); + MIRType *ptrType = GlobalTables::GetTypeTable().GetOrCreatePointerType(*GlobalTables::GetTypeTable().GetInt8()); + MIRSymbol *alloca = + getOrCreateSymbol(mirBuilder, ptrType->GetTypeIndex(), "asan_alloca", kStVar, kScAuto, mirFunction, kScopeLocal); + DassignNode *dassignNode = + mirBuilder->CreateStmtDassign(alloca->GetStIdx(), 0, mirBuilder->CreateAddrof(*tmp, PTY_u64)); + mirFunction->GetBody()->InsertBefore(insBefore, dassignNode); + return alloca; +} + +bool FunctionStackPoisoner::isFuncCallArg(const MIRSymbol *const symbolPtr) const { + auto iter = callArgSymbols.find(symbolPtr); + return iter != callArgSymbols.end(); +} + +bool FunctionStackPoisoner::isFuncCallArg(const std::string symbolName) const { + auto iter = callArgSymbolNames.find(symbolName); + return iter != callArgSymbolNames.end(); +} + +void FunctionStackPoisoner::processStackVariable() { + if (stackVariableDesc.empty()) { + return; + } + StmtNode *insBefore = mirFunction->GetBody()->GetFirst(); + size_t granularity = 1ULL << Mapping.Scale; + size_t minHeaderSize = std::max(ASan.LongSize / 2, granularity); + const ASanStackFrameLayout &L = ComputeASanStackFrameLayout(stackVariableDesc, granularity, minHeaderSize); + auto descriptionString = ComputeASanStackFrameDescription(stackVariableDesc); + LogInfo::MapleLogger() << descriptionString << " --- " << L.FrameSize << "\n"; + + bool doStackMalloc = true; + uint64_t localStackSize = L.FrameSize; + if (ENABLE_STACK_SIZE_LIMIT) { + doStackMalloc = localStackSize <= kMaxStackMallocSize; + int stackMallocIdx = StackMallocSizeClass(localStackSize); + CHECK_FATAL(stackMallocIdx <= kMaxAsanStackMallocSizeClass, "Too large stackMallocIdx"); + } + doStackMalloc = (!HasNonEmptyInlineAsm) && (!HasReturnsTwiceCall) && doStackMalloc; + + MIRBuilder *mirBuilder = module->GetMIRBuilder(); + MIRSymbol *allocaValue = createAllocaForLayout(insBefore, mirBuilder, L); + + for (size_t i = 0; i < stackVariableDesc.size(); i++) { + ASanStackVariableDescription desc = stackVariableDesc.at(i); + if (desc.Symbol != nullptr) { + MIRSymbol *localVar = desc.Symbol; + BinaryNode *addExpr = + mirBuilder->CreateExprBinary(OP_add, *IntptrTy, + mirBuilder->CreateExprTypeCvt(OP_cvt, IntptrTy->GetPrimType(), PTY_u64, + *mirBuilder->CreateDread(*allocaValue, PTY_a64)), + mirBuilder->CreateIntConst(desc.Offset, IntptrTy->GetPrimType())); + // change the variable to a pointer with name asan_ + MIRType *localVarPtr = GlobalTables::GetTypeTable().GetOrCreatePointerType(desc.Symbol->GetTyIdx()); + MIRSymbol *newLocalVar = getOrCreateSymbol(mirBuilder, localVarPtr->GetTypeIndex(), "asan_" + localVar->GetName(), + kStVar, kScAuto, mirFunction, kScopeLocal); + newLocalVar->SetSrcPosition(localVar->GetSrcPosition()); + // initialize the field of the Var by dassign + DassignNode *dassignNode = mirBuilder->CreateStmtDassign(newLocalVar->GetStIdx(), 0, addExpr); + dassignNode->InsertAfterThis(*insBefore); + // replace the Var being referenced + replaceAllUsesWith(localVar, newLocalVar); + /* The stack Var could be a function call parameter + When the stack Var is initialized by the calling convention, its shadow memory can be + initialized already, or it has value so that we need to assign the source value to its field + NOTE: this must be done after replaceAllUsesWith; otherwise, the initialization could be falsely + replaced*/ + if (isFuncCallArg(localVar)) { + // // dread asan_ PTY_a64 + // BaseNode *asanAddrExpr = mirBuilder->CreateDread(*newLocalVar, PTY_a64); + // // dread + // MIRType *argVarType = localVar->GetType(); + // BaseNode *argVarValue = mirBuilder->CreateDread(*localVar, argVarType->GetPrimType()); + // // TODO: need check->since we assign the whole value of an argument, the field should be 0 + // IassignNode *iassignNode = mirBuilder->CreateStmtIassign(*localVarPtr, 0, asanAddrExpr, argVarValue); + // iassignNode->InsertAfterThis(*insBefore); + // // here we call memcpy to parse the argument + // the function should be the same as the upper code + MapleVector args(module->GetMPAllocator().Adapter()); + args.emplace_back(mirBuilder->CreateDread(*newLocalVar, PTY_a64)); + args.emplace_back(mirBuilder->CreateAddrof(*localVar, PTY_u64)); + args.emplace_back(mirBuilder->GetConstUInt64(localVar->GetType()->GetSize())); + IntrinsiccallNode *intrinsiccallNode = mirBuilder->CreateStmtIntrinsicCall(INTRN_C_memcpy, args); + intrinsiccallNode->InsertAfterThis(*insBefore); + } + } + if (desc.AllocaInst != nullptr) { + BinaryNode *addExpr = + mirBuilder->CreateExprBinary(OP_add, *IntptrTy, + mirBuilder->CreateExprTypeCvt(OP_cvt, IntptrTy->GetPrimType(), PTY_u64, + *mirBuilder->CreateDread(*allocaValue, PTY_a64)), + mirBuilder->CreateIntConst(desc.Offset, IntptrTy->GetPrimType())); + if (desc.AllocaInst->GetOpCode() == OP_regassign || desc.AllocaInst->GetOpCode() == OP_dassign) { + UnaryStmtNode *assignNode = dynamic_cast(desc.AllocaInst); + assignNode->SetRHS(addExpr); + } + } + insBefore = insBefore->GetNext()->GetPrev(); + } + + // The left-most redzone has enough space for at least 4 pointers. + // Write the Magic value to redzone[0]. + BaseNode *basePlus0 = mirBuilder->CreateDread(*allocaValue, PTY_a64); + IassignNode *basePlus0Store = mirBuilder->CreateStmtIassign( + *IntptrPtrTy, 0, basePlus0, mirBuilder->CreateIntConst(kCurrentStackFrameMagic, IntptrTy->GetPrimType())); + basePlus0Store->InsertAfterThis(*insBefore); + // Write the frame description constant to redzone[1] + BaseNode *basePlus1 = mirBuilder->CreateExprBinary(OP_add, *GlobalTables::GetTypeTable().GetPrimType(PTY_u64), + mirBuilder->CreateDread(*allocaValue, PTY_a64), + mirBuilder->CreateIntConst(ASan.LongSize / 8, PTY_u64)); + + ConststrNode *description = module->CurFuncCodeMemPool()->New( + PTY_a64, GlobalTables::GetUStrTable().GetOrCreateStrIdxFromName(descriptionString)); + + IassignNode *basePlus1Store = mirBuilder->CreateStmtIassign(*IntptrPtrTy, 0, basePlus1, description); + basePlus1Store->InsertAfterThis(*insBefore); + // Write the PC to redzone[2] + BaseNode *basePlus2 = mirBuilder->CreateExprBinary(OP_add, *GlobalTables::GetTypeTable().GetPrimType(PTY_u64), + mirBuilder->CreateDread(*allocaValue, PTY_a64), + mirBuilder->CreateIntConst(2 * ASan.LongSize / 8, PTY_u64)); + AddroffuncNode *addroffuncNode = mirBuilder->CreateExprAddroffunc(mirFunction->GetPuidx()); + addroffuncNode->SetPrimType(PTY_a64); + IassignNode *basePlus2Store = mirBuilder->CreateStmtIassign(*IntptrPtrTy, 0, basePlus2, addroffuncNode); + basePlus2Store->InsertAfterThis(*insBefore); + const auto &shadowAfterScope = GetShadowBytesAfterScope(stackVariableDesc, L); + +#ifdef ENABLERBTREE + MapleVector args(mirBuilder->GetCurrentFuncCodeMpAllocator()->Adapter()); + args.emplace_back(mirBuilder->CreateDread(*allocaValue, PTY_a64)); + args.emplace_back(mirBuilder->CreateIntConst(shadowAfterScope.size() * L.Granularity, ASan.IntPtrPrim)); + auto callNode = mirBuilder->CreateStmtCall(ASan.AsanRBTStackInsert->GetPuidx(), args); + callNode->InsertAfterThis(*insBefore); + + // Dig holes in redzone for variables + for (auto const &desc : stackVariableDesc) { + // LogInfo::MapleLogger() << "digging hole for " << desc.Name << "\n"; + MapleVector args(mirBuilder->GetCurrentFuncCodeMpAllocator()->Adapter()); + auto redzonePtr = + mirBuilder->CreateExprBinary(OP_add, *ASan.IntPtrTy, mirBuilder->CreateDread(*allocaValue, PTY_a64), + mirBuilder->CreateIntConst(desc.Offset, ASan.IntPtrPrim)); + args.emplace_back(redzonePtr); + args.emplace_back(mirBuilder->CreateIntConst(desc.Size, ASan.IntPtrPrim)); + auto callNode = mirBuilder->CreateStmtCall(ASan.AsanRBTStackDelete->GetPuidx(), args); + callNode->InsertAfterThis(*insBefore); + } + + // (Un)poison the stack before all ret instructions. + for (StmtNode *ret : RetVec) { + MapleVector args(mirBuilder->GetCurrentFuncCodeMpAllocator()->Adapter()); + args.emplace_back(mirBuilder->CreateDread(*allocaValue, PTY_a64)); + args.emplace_back(mirBuilder->CreateIntConst(L.FrameSize, ASan.IntPtrPrim)); + auto callNode = mirBuilder->CreateStmtCall(ASan.AsanRBTStackDelete->GetPuidx(), args); + callNode->InsertAfterThis(*ret); + } +#else + // Get the value of shadow memory + MIRSymbol *shadowBase = getOrCreateSymbol(mirBuilder, IntptrTy->GetTypeIndex(), "asan_shadowBase", kStVar, kScAuto, + mirFunction, kScopeLocal); + DassignNode *dassignNode = mirBuilder->CreateStmtDassign( + *shadowBase, 0, ASan.memToShadow(mirBuilder->CreateDread(*allocaValue, PTY_a64), *mirBuilder)); + dassignNode->InsertAfterThis(*insBefore); + copyToShadow(shadowAfterScope, shadowAfterScope, mirBuilder, + mirBuilder->CreateDread(*shadowBase, shadowBase->GetType()->GetPrimType()), insBefore); + + std::vector shadowClean(shadowAfterScope.size(), 0); + + // (Un)poison the stack before all ret instructions. + for (StmtNode *ret : RetVec) { + // Mark the current frame as retired. + IassignNode *retiredNode = mirBuilder->CreateStmtIassign( + *IntptrPtrTy, 0, basePlus0, mirBuilder->CreateIntConst(kRetiredStackFrameMagic, IntptrTy->GetPrimType())); + retiredNode->InsertAfterThis(*ret); + if (doStackMalloc) { + copyToShadow(shadowAfterScope, shadowClean, mirBuilder, + mirBuilder->CreateDread(*shadowBase, shadowBase->GetType()->GetPrimType()), ret); + } + } +#endif + // We are done. Remove the old unused alloca instructions. + for (ASanStackVariableDescription svd : stackVariableDesc) { + if (svd.Symbol != nullptr) { + svd.Symbol->SetIsDeleted(); + } + } +} + +BaseNode *FunctionStackPoisoner::GetTransformedNode(MIRSymbol *oldVar, MIRSymbol *newVar, BaseNode *baseNode) { + BaseNode *retNode = nullptr; + if (baseNode->GetOpCode() == OP_addrof) { + AddrofNode *addrofNode = dynamic_cast(baseNode); + MIRSymbol *mirSymbol = mirFunction->GetLocalOrGlobalSymbol(addrofNode->GetStIdx()); + if (mirSymbol->GetStIdx() == oldVar->GetStIdx()) { + retNode = module->GetMIRBuilder()->CreateDread(*newVar, PTY_a64); + return retNode; + } + } else if (baseNode->GetOpCode() == OP_dassign) { + DassignNode *dassignNode = dynamic_cast(baseNode); + MIRSymbol *mirSymbol = mirFunction->GetLocalOrGlobalSymbol(dassignNode->GetStIdx()); + if (mirSymbol->GetStIdx() == oldVar->GetStIdx()) { + BaseNode *newRHS = GetTransformedNode(oldVar, newVar, dassignNode->GetRHS()); + StmtNode *newStmtNode = + module->GetMIRBuilder()->CreateStmtIassign(*newVar->GetType(), dassignNode->GetFieldID(), + module->GetMIRBuilder()->CreateDread(*newVar, PTY_a64), newRHS); + retNode = newStmtNode; + return retNode; + } + } else if (baseNode->GetOpCode() == OP_dread) { + DreadNode *dreadNode = dynamic_cast(baseNode); + MIRSymbol *mirSymbol = mirFunction->GetLocalOrGlobalSymbol(dreadNode->GetStIdx()); + if (mirSymbol->GetStIdx() == oldVar->GetStIdx()) { + IreadNode *newStmtNode = module->GetMIRBuilder()->CreateExprIread( + *GlobalTables::GetTypeTable().GetPrimType(dreadNode->GetPrimType()), *newVar->GetType(), + dreadNode->GetFieldID(), module->GetMIRBuilder()->CreateDread(*newVar, PTY_a64)); + retNode = newStmtNode; + return retNode; + } + } else if (baseNode->GetOpCode() == OP_dassignoff) { + DassignoffNode *dassignoffNode = dynamic_cast(baseNode); + MIRSymbol *mirSymbol = mirFunction->GetLocalOrGlobalSymbol(dassignoffNode->GetStIdx()); + if (mirSymbol->GetStIdx() == oldVar->GetStIdx()) { + BaseNode *newRHS = GetTransformedNode(oldVar, newVar, dassignoffNode->GetRHS()); + StmtNode *newStmtNode = + module->GetMIRBuilder()->CreateStmtIassignoff(newVar->GetType()->GetPrimType(), + dassignoffNode->GetOffset(), + module->GetMIRBuilder()->CreateDread(*newVar, PTY_a64), + newRHS); + retNode = newStmtNode; + return retNode; + } + } + + // all other process must run following code to make sure every child has been visited + for (size_t j = 0; j < baseNode->NumOpnds(); j++) { + BaseNode *tmpNode = GetTransformedNode(oldVar, newVar, baseNode->Opnd(j)); + if (tmpNode != baseNode->Opnd(j)) { + baseNode->SetOpnd(tmpNode, j); + } + } + retNode = baseNode; + CHECK_FATAL(retNode != nullptr, "No return node."); + return retNode; +} + +void FunctionStackPoisoner::replaceAllUsesWith(MIRSymbol *oldVar, MIRSymbol *newVar) { + if (mirFunction->GetBody() == nullptr) { + return; + } + CHECK_FATAL(oldVar->GetTyIdx() == dynamic_cast(newVar->GetType())->GetPointedTyIdx(), + "Replace Var SYmbol with different PointedTyIdx"); + std::vector> toReplace; + for (StmtNode &stmt : mirFunction->GetBody()->GetStmtNodes()) { + BaseNode *newStmt = GetTransformedNode(oldVar, newVar, &stmt); + if (newStmt != dynamic_cast(&stmt)) { + StmtNode *stmt2ptr = dynamic_cast(newStmt); + CHECK_FATAL(stmt2ptr != nullptr, "Get a stmt2 without StmtNode type"); + stmt2ptr->SetSrcPos(stmt.GetSrcPos()); + toReplace.emplace_back(std::pair(&stmt, stmt2ptr)); + } + } + + for (auto ss : toReplace) { + mirFunction->GetBody()->ReplaceStmt1WithStmt2(ss.first, ss.second); + } +} +} // namespace maple + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/src/san_common.cpp b/src/mapleall/maple_san/src/san_common.cpp new file mode 100644 index 0000000000000000000000000000000000000000..29e626e3da259c864e133c2478b5fed4cec9ab7c --- /dev/null +++ b/src/mapleall/maple_san/src/san_common.cpp @@ -0,0 +1,795 @@ +// +// Created by wchenbt on 5/4/2021. +// +#ifdef ENABLE_MAPLE_SAN + +#include "san_common.h" + +#include "asan_interfaces.h" +#include "me_function.h" +#include "me_ir.h" +#include "mir_builder.h" +#include "string_utils.h" + +namespace maple { + +void appendToGlobalCtors(const MIRModule &mirModule, const MIRFunction *func) { + MIRBuilder *mirBuilder = mirModule.GetMIRBuilder(); + MIRFunction *GlobalCtors = mirBuilder->GetOrCreateFunction("__cxx_global_var_init", TyIdx(PTY_void)); + MapleVector args(mirBuilder->GetCurrentFuncCodeMpAllocator()->Adapter()); + CallNode *callNode = mirBuilder->CreateStmtCall(func->GetPuidx(), args); + GlobalCtors->GetBody()->AddStatement(callNode); +} + +void appendToGlobalDtors(const MIRModule &mirModule, const MIRFunction *func) { + MIRBuilder *mirBuilder = mirModule.GetMIRBuilder(); + MIRFunction *GlobalCtors = mirBuilder->GetOrCreateFunction("__cxx_global_var_fini", TyIdx(PTY_void)); + MapleVector args(mirBuilder->GetCurrentFuncCodeMpAllocator()->Adapter()); + CallNode *callNode = mirBuilder->CreateStmtCall(func->GetPuidx(), args); + GlobalCtors->GetBody()->AddStatement(callNode); +} + +MIRFunction *getOrInsertFunction(MIRBuilder *mirBuilder, const char *name, MIRType *retType, + std::vector argTypes) { + GStrIdx strIdx = GlobalTables::GetStrTable().GetStrIdxFromName(name); + MIRFunction *func = mirBuilder->GetOrCreateFunction(name, retType->GetTypeIndex()); + + if (strIdx != 0u) { + return func; + } + + func->AllocSymTab(); + + /* use void* for PTY_dynany */ + if (retType->GetPrimType() == PTY_dynany) { + retType = GlobalTables::GetTypeTable().GetPtr(); + } + + std::vector formals; + for (uint32 j = 0; j < argTypes.size(); ++j) { + MIRType *argTy = argTypes.at(j); + /* use void* for PTY_dynany */ + if (argTy->GetPrimType() == PTY_dynany) { + argTy = GlobalTables::GetTypeTable().GetPtr(); + } + MIRSymbol *argSt = func->GetSymTab()->CreateSymbol(kScopeLocal); + const uint32 bufSize = 18; + char buf[bufSize] = {'\0'}; + int eNum = sprintf_s(buf, bufSize - 1, "p%u", j); + if (eNum == -1) { + FATAL(kLncFatal, "sprintf_s failed"); + } + std::string strBuf(buf); + argSt->SetNameStrIdx(mirBuilder->GetOrCreateStringIndex(strBuf)); + argSt->SetTyIdx(argTy->GetTypeIndex()); + argSt->SetStorageClass(kScFormal); + argSt->SetSKind(kStVar); + func->GetSymTab()->AddToStringSymbolMap(*argSt); + formals.emplace_back(argSt); + } + func->SetAttr(FuncAttrKind::FUNCATTR_public); + func->SetAttr(FuncAttrKind::FUNCATTR_extern); + func->UpdateFuncTypeAndFormalsAndReturnType(formals, retType->GetTypeIndex(), false); + return func; +} + +/// Create a global describing a source location. +MIRAddrofConst *createSourceLocConst(MIRModule &mirModule, MIRSymbol *Var, PrimType primType) { + MIRStrConst *moduleName = createStringConst(mirModule, mirModule.GetFileName(), PTY_a64); + MIRConst *LocData[] = { + moduleName, + GlobalTables::GetIntConstTable().GetOrCreateIntConst(Var->GetSrcPosition().LineNum(), + *GlobalTables::GetTypeTable().GetInt32()), + GlobalTables::GetIntConstTable().GetOrCreateIntConst(Var->GetSrcPosition().Column(), + *GlobalTables::GetTypeTable().GetInt32()), + }; + // Create struct type + MIRStructType LocStruct(kTypeStruct); + GlobalTables::GetTypeTable().AddFieldToStructType(LocStruct, "module_name", + *GlobalTables::GetTypeTable().GetTypeFromTyIdx(TyIdx(primType))); + GlobalTables::GetTypeTable().AddFieldToStructType(LocStruct, "line", *GlobalTables::GetTypeTable().GetInt32()); + GlobalTables::GetTypeTable().AddFieldToStructType(LocStruct, "column", *GlobalTables::GetTypeTable().GetInt32()); + // Create initial value + MIRAggConst *LocStructConst = mirModule.GetMemPool()->New(mirModule, LocStruct); + // Initialize the field orig + for (uint32_t i = 0; i < 3; i++) { + LocStructConst->AddItem(LocData[i], i + 1); + } + // Create a new symbol, MIRStructType is a subclass of MIRType + TyIdx LocStructTy = GlobalTables::GetTypeTable().GetOrCreateMIRType(&LocStruct); + MIRSymbol *LocStructSym = mirModule.GetMIRBuilder()->CreateSymbol(LocStructTy, Var->GetName() + "_Loc", kStConst, + kScGlobal, nullptr, kScopeGlobal); + LocStructSym->SetKonst(LocStructConst); + return createAddrofConst(mirModule, LocStructSym, primType); +} + +MIRAddrofConst *createAddrofConst(const MIRModule &mirModule, const MIRSymbol *mirSymbol, PrimType primType) { + AddrofNode *addrofNode = mirModule.GetMIRBuilder()->CreateAddrof(*mirSymbol); + MIRAddrofConst *mirAddrofConst = + mirModule.GetMemPool()->New(addrofNode->GetStIdx(), addrofNode->GetFieldID(), + *GlobalTables::GetTypeTable().GetTypeFromTyIdx(TyIdx(primType))); + return mirAddrofConst; +} + +// Create a constant for Str so that we can pass it to the run-time lib. +MIRStrConst *createStringConst(const MIRModule &mirModule, const std::basic_string& Str, PrimType primType) { + MIRStrConst *strConst = + mirModule.GetMemPool()->New(GlobalTables::GetUStrTable().GetOrCreateStrIdxFromName(Str), + *GlobalTables::GetTypeTable().GetTypeFromTyIdx(TyIdx(primType))); + + return strConst; +} + +bool isTypeSized(MIRType *type) { + if (type->GetKind() == kTypeScalar || type->GetKind() == kTypePointer || type->GetKind() == kTypeBitField) { + return true; + } + if (type->GetKind() != kTypeStruct && type->GetKind() != kTypeStructIncomplete && type->GetKind() != kTypeArray && + type->GetKind() != kTypeFArray && type->GetKind() != kTypeUnion) { + return false; + } + if (type->GetKind() == kTypeArray) { + MIRArrayType *arrayType = dynamic_cast(type); + if (arrayType) { + return isTypeSized(arrayType->GetElemType()); + } + } + if (type->GetKind() == kTypeFArray) { + MIRFarrayType *farrayType = dynamic_cast(type); + if (farrayType) { + return isTypeSized(farrayType->GetElemType()); + } + } + if (type->IsStructType()) { + MIRStructType *structType = dynamic_cast(type); + if (structType) { + for (size_t i = 1; i < structType->GetFieldsSize(); i++) { + // FieldID type is int32, there should be a FieldID_MAX + CHECK_FATAL(i < INT32_MAX, "Too large filed size."); + if (!isTypeSized(structType->GetFieldType(FieldID(i)))) { + return false; + } + } + return true; + } + } + return false; +} + +std::vector GetGlobalVaribles(const MIRModule &mirModule) { + std::vector globalVarVec; + for (auto sit = mirModule.GetSymbolDefOrder().begin(); sit != mirModule.GetSymbolDefOrder().end(); ++sit) { + MIRSymbol *s = GlobalTables::GetGsymTable().GetSymbolFromStidx((*sit).Idx()); + CHECK_FATAL(s != nullptr, "nullptr check"); + if (s->IsJavaClassInterface()) { + continue; + } + if (!s->IsDeleted() && !s->GetIsImported() && !s->GetIsImportedDecl()) { + if (s->GetSKind() == kStVar) { + globalVarVec.push_back(s); + } + } + } + return globalVarVec; +} + +int computeRedZoneField(MIRType *type) { + int field = 1; + if (type->IsStructType()) { + MIRStructType *structType = dynamic_cast(type); + for (size_t i = 1; i < structType->GetFieldsSize(); i++) { + CHECK_FATAL(i < INT32_MAX, "Too large field size."); + MIRType *subType = structType->GetFieldType(FieldID(i)); + field += computeRedZoneField(subType); + } + } + return field; +} + +size_t TypeSizeToSizeIndex(uint32_t TypeSize) { + uint32_t Val = TypeSize / 8; + if (!Val) { + return std::numeric_limits::digits; + } + if (Val & 0x1) { + return 0; + } + + // Bisection method. + unsigned zeroBits = 0; + uint32_t shift = std::numeric_limits::digits >> 1; + uint32_t mask = std::numeric_limits::max() >> shift; + while (shift) { + if ((Val & mask) == 0) { + Val >>= shift; + zeroBits |= shift; + } + shift >>= 1; + mask >>= shift; + } + assert(zeroBits < kNumberOfAccessSizes); + return zeroBits; +} + +MIRSymbol *getOrCreateSymbol(MIRBuilder *mirBuilder, const TyIdx tyIdx, const std::string &name, MIRSymKind mClass, + MIRStorageClass sClass, MIRFunction *func, uint8 scpID) { + MIRSymbol *st = nullptr; + if (func) { + st = func->GetSymTab()->GetSymbolFromStrIdx(mirBuilder->GetOrCreateStringIndex(name)); + } else { + st = GlobalTables::GetGsymTable().GetSymbolFromStrIdx(mirBuilder->GetOrCreateStringIndex(name)); + } + if (st == nullptr || st->GetTyIdx() != tyIdx) { + return mirBuilder->CreateSymbol(tyIdx, name, mClass, sClass, func, scpID); + } + CHECK_FATAL(mClass == st->GetSKind(), + "trying to create a new symbol that has the same name and GtyIdx. might cause problem"); + CHECK_FATAL(sClass == st->GetStorageClass(), + "trying to create a new symbol that has the same name and tyIdx. might cause problem"); + return st; +} + +// Code for Sanrazor +int SANRAZOR_MODE() { + /* + Sanrazor has several mode + 0. Didn't turn on / default + 1. intrument for coverage + 2. collected coverage, analysis and remove sanitzer check + */ + char *env = getenv("SANRAZOR_MODE"); + int SanrazorMode = 0; + if (env) { + SanrazorMode = atoi(env); + if (SanrazorMode >= 3) { + return 0; + } + return SanrazorMode; + } else { + return 0; + } +} + +CallNode *retCallCOV(const MeFunction &func, int bb_id, int stmt_id, int br_true, int type_of_check) { + MIRBuilder *builder = func.GetMIRModule().GetMIRBuilder(); + MIRType *voidType = GlobalTables::GetTypeTable().GetVoid(); + // void __san_cov_trace_pc(char *file_name, int bb_id, int stmt_id,int brtrue,int typecheck) + MIRFunction *__san_cov_trace_pc = getOrInsertFunction(builder, "__san_cov_trace_pc", voidType, {}); + MapleVector argcov(func.GetMIRModule().GetMPAllocator().Adapter()); + UStrIdx strIdx = GlobalTables::GetUStrTable().GetOrCreateStrIdxFromName(func.GetMIRModule().GetFileName()); + ConststrNode *conststr = func.GetMIRModule().GetMemPool()->New(strIdx); + conststr->SetPrimType(PTY_a64); + argcov.emplace_back(conststr); + argcov.emplace_back(builder->GetConstInt(bb_id)); + argcov.emplace_back(builder->GetConstInt(stmt_id)); + argcov.emplace_back(builder->GetConstInt(br_true)); + argcov.emplace_back(builder->GetConstInt(type_of_check)); + CallNode *callcov = builder->CreateStmtCall(__san_cov_trace_pc->GetPuidx(), argcov); + return callcov; +} + +bool isReg_redefined(BaseNode *stmt, std::vector &stmt_reg) { + switch (stmt->GetOpCode()) { + case OP_regread: { + RegreadNode *regread = static_cast(stmt); + stmt_reg.push_back(regread->GetRegIdx()); + break; + } + default: { + for (size_t i = 0; i < stmt->NumOpnds(); i++) { + isReg_redefined(stmt->Opnd(i), stmt_reg); + } + } + } + if (stmt->GetOpCode() == OP_regassign) { + RegassignNode *regAssign = static_cast(stmt); + if (std::count(stmt_reg.begin(), stmt_reg.end(), regAssign->GetRegIdx())) { + // value update + return false; + } else { + return true; + } + } + return false; +} + +template +void print_stack(std::stack &st) { + if (st.empty()) return; + T x = st.top(); + LogInfo::MapleLogger() << x << ","; + st.pop(); + print_stack(st); + st.push(x); +} + +template +bool compareVectors(const std::vector& a, const std::vector& b) { + // I am not sure why the original implementation use + // sets to compare the equivalence of two vectors (peformance?) + // Anyway, I think we may not delete the following code now + // if (a.size() != b.size()) + // { + // return false; + // } + // std::sort(a.begin(), a.end()); + // std::sort(b.begin(), b.end()); + // return (a == b); + std::set set_a(a.begin(), a.end()); + std::set set_b(b.begin(), b.end()); + if ((set_a.size() > 0) && (set_b.size() > 0)) { + return (set_a == set_b); + } + return false; +} + +int getIndex(std::vector v, StmtNode *K) { + auto it = find(v.begin(), v.end(), K); + // If element was found + if (it != v.end()) { + int index = it - v.begin(); + return index; + } else { + return -1; + } +} + +StmtNode *retLatest_Regassignment(StmtNode *stmt, int32 register_number) { + StmtNode *ret_stmt = nullptr; + StmtNode *prevStmt = stmt->GetPrev(); + if (prevStmt != nullptr) { + if (prevStmt->GetOpCode() == OP_regassign) { + RegassignNode *regAssign = static_cast(prevStmt); + if (register_number == regAssign->GetRegIdx()) { + return prevStmt; + } else { + ret_stmt = retLatest_Regassignment(prevStmt, register_number); + } + } else if (prevStmt->GetOpCode() == OP_iassign) { + IassignNode *iassign = static_cast(prevStmt); + BaseNode *addr_expr = iassign->Opnd(0); + if (addr_expr->GetOpCode() == OP_iread) { + std::vector dump_reg; + recursion(addr_expr, dump_reg); + for (int32 reg_tmp : dump_reg) { + if (reg_tmp == register_number) { + return prevStmt; + } + } + ret_stmt = retLatest_Regassignment(prevStmt, register_number); + } else if (addr_expr->GetOpCode() == OP_regread) { + RegreadNode *regread = static_cast(addr_expr); + if (register_number == regread->GetRegIdx()) { + return prevStmt; + } else { + ret_stmt = retLatest_Regassignment(prevStmt, register_number); + } + } else if (IsCommutative(addr_expr->GetOpCode())) { + /* + 0th stmt: add u64 ( + iread u64 <* <$_TY_IDX111>> 22 (regread ptr %177), + cvt u64 i32 (mul i32 (regread i32 %190, constval i32 2))) + */ + // We just assume its sth like register +/- sth patterns + std::vector dump_reg; + recursion(addr_expr->Opnd(0), dump_reg); + for (int32 reg_tmp : dump_reg) { + if (reg_tmp == register_number) { + return prevStmt; + } + } + ret_stmt = retLatest_Regassignment(prevStmt, register_number); + } else { + ret_stmt = retLatest_Regassignment(prevStmt, register_number); + } + } else { + ret_stmt = retLatest_Regassignment(prevStmt, register_number); + } + } + return ret_stmt; +} + +StmtNode *retLatest_Varassignment(StmtNode *stmt, uint32 var_number) { + StmtNode *ret_stmt = nullptr; + StmtNode *prevStmt = stmt->GetPrev(); + if (prevStmt != nullptr) { + if (prevStmt->GetOpCode() == OP_dassign || prevStmt->GetOpCode() == OP_maydassign) { + DassignNode *dassign = static_cast(prevStmt); + if (var_number == dassign->GetStIdx().Idx()) { + return prevStmt; + } else { + ret_stmt = retLatest_Varassignment(prevStmt, var_number); + } + } else if (prevStmt->GetOpCode() == OP_iassign) { + IassignNode *iassign = static_cast(prevStmt); + BaseNode *addr_expr = iassign->Opnd(0); + if (addr_expr->GetOpCode() == OP_dread) { + // dread i64 %asan_shadowBase + DreadNode *dread = static_cast(addr_expr); + if (var_number == dread->GetStIdx().Idx()) { + return prevStmt; + } else { + ret_stmt = retLatest_Varassignment(prevStmt, var_number); + } + } else { + ret_stmt = retLatest_Varassignment(prevStmt, var_number); + } + } else { + ret_stmt = retLatest_Varassignment(prevStmt, var_number); + } + } + return ret_stmt; +} + +void print_dep(set_check dep) { + LogInfo::MapleLogger() << "\nOpcode: "; + for (auto opcode_tmp : dep.opcode) { + LogInfo::MapleLogger() << int(opcode_tmp) << ","; + } + LogInfo::MapleLogger() << "\n"; +} + +std::set OP_code_blacklist{ + OP_addroffunc, OP_iaddrof, OP_addrof, OP_iread, OP_ireadoff, OP_iassign, OP_dread, OP_regread, OP_regassign, + OP_dassign, OP_maydassign, OP_iassignoff, OP_iassignfpoff, + // We only handle the SAN-SAN case + // // check with edit distance ==1 + OP_cvt, + // candidnate: + // OP_band, + // OP_zext, + // OP_ashr, + // // check with edit distance ==2 + // OP_add, + // OP_sub, + OP_constval, + // candidnate: + // OP_add, + // OP_ashr +}; + +std::set OP_code_re_map{OP_eq, OP_ge, OP_gt, OP_le, OP_lt, OP_ne, OP_cmp, OP_cmpl, OP_cmpg}; + +void dep_iassign_expansion(IassignNode *iassign, set_check &dep) { + BaseNode *rhs_expr = iassign->Opnd(1); + if (rhs_expr->GetOpCode() == OP_regread) { + // Case 1. regread u32 %13 + RegreadNode *regread = static_cast(rhs_expr); + dep.register_live.push(regread->GetRegIdx()); + } else if (rhs_expr->GetOpCode() == OP_constval) { + // Case 2. constval i32 0 -> terminal + ConstvalNode *constValNode = static_cast(rhs_expr); + MIRConst *mirConst = constValNode->GetConstVal(); + if (mirConst != nullptr) { + if (mirConst->GetKind() == kConstInt) { + auto *const_to_get_value = safe_cast(mirConst); + dep.const_int64.push_back(const_to_get_value->GetValue()); + } + } + } else if (rhs_expr->GetOpCode() == OP_iread) { + // Case 3. iread agg <* <$_TY_IDX334>> 0 (regread ptr %4) -> Only hold the ptr for deref + std::vector dump_reg; + recursion(rhs_expr, dump_reg); + for (int32 reg_temp : dump_reg) { + dep.register_live.push(reg_temp); + } + } else { + // Case 4. zext u32 8 (lshr u32 (regread u32 %4, constval i32 24)) + // Just assume it can be further expand and treat as a terminal... + // Some of this of compound stmt are register + // assigned by callassigned or function input register + // Although there are some case didn't like this + // We can set it as terminal register to prevent recursively deref + // since it may crash + // A proper SSA likely fix this issue + std::vector dump_reg; + recursion(rhs_expr, dump_reg); + for (int32 reg_temp : dump_reg) { + dep.register_terminal.push_back(reg_temp); + } + } +} + +void dep_constval_expansion(ConstvalNode *constValNode, set_check &dep) { + MIRConst *mirConst = constValNode->GetConstVal(); + // we only trace int64 + // We didn't handle following cases + // kConstFloatConst, MIRFloatConst + // kConstDoubleConst, MIRDoubleConst + if (mirConst != nullptr) { + if (mirConst->GetKind() == kConstInt) { + auto *const_to_get_value = safe_cast(mirConst); + dep.const_int64.push_back(const_to_get_value->GetValue()); + } + } +} + +void dep_dassign_expansion(DassignNode *dassign, set_check &dep, std::map> reg_to_stmt, + std::map> var_to_stmt, MeFunction func) { + std::stack san_blacklist_stack; + bool required_to_clean_san = false; + if (func.GetMIRModule().CurFunction()->GetSymbolTabSize() >= dassign->GetStIdx().Idx()) { + MIRSymbol *var = func.GetMIRModule().CurFunction()->GetSymbolTabItem(dassign->GetStIdx().Idx()); + if (var->GetName().find("asan_length") == 0) { + // dassign %asan_length 0 (band i64 (dread i64 %asan_addr, constval i64 7)) + san_blacklist_stack.push(OP_band); + san_blacklist_stack.push(OP_add); + required_to_clean_san = true; + } else if (var->GetName().find("asan_shadowValue") == 0) { + san_blacklist_stack.push(OP_ashr); + san_blacklist_stack.push(OP_add); + required_to_clean_san = true; + } + } + if (required_to_clean_san) { + if (san_blacklist_stack.size() >= dep.opcode.size()) { + for (size_t opcode_vect_i = 0; opcode_vect_i < dep.opcode.size(); ++opcode_vect_i) { + dep.opcode.pop_back(); + } + } else { + while (!san_blacklist_stack.empty()) { + bool done = false; + uint8 remove_item = san_blacklist_stack.top(); + san_blacklist_stack.pop(); + LogInfo::MapleLogger() << remove_item; + for (std::vector::iterator it = dep.opcode.begin(); it != dep.opcode.end(); ++it) { + if (*it == remove_item && !done) { + dep.opcode.erase(it); + done = true; + } + } + } + } + } + for (size_t i = 0; i < dassign->NumOpnds(); i++) { + dep_expansion(dassign->Opnd(i), dep, reg_to_stmt, var_to_stmt, func); + } +} + +void dep_expansion(BaseNode *stmt, set_check &dep, std::map> reg_to_stmt, + std::map> var_to_stmt, const MeFunction& func) { + if ((!OP_code_blacklist.count(stmt->GetOpCode())) && (!OP_code_re_map.count(stmt->GetOpCode()))) { + dep.opcode.push_back(stmt->GetOpCode()); + } else if (OP_code_re_map.count(stmt->GetOpCode())) { + dep.opcode.push_back(uint8(253)); + } + switch (stmt->GetOpCode()) { + case OP_iassign: { + IassignNode *iassign = static_cast(stmt); + dep_iassign_expansion(iassign, dep); + break; + } + case OP_regread: { + RegreadNode *regread = static_cast(stmt); + dep.register_live.push(regread->GetRegIdx()); + break; + } + case OP_constval: { + ConstvalNode *constValNode = static_cast(stmt); + dep_constval_expansion(constValNode, dep); + break; + } + case OP_conststr: { + ConststrNode *conststr = static_cast(stmt); + dep.const_str.push_back(conststr->GetStrIdx()); + break; + } + case OP_conststr16: { + Conststr16Node *conststr16 = static_cast(stmt); + dep.const_str.push_back(conststr16->GetStrIdx()); + break; + } + case OP_dread: { + DreadNode *dread = static_cast(stmt); + dep.var_live.push(dread->GetStIdx().Idx()); + break; + } + case OP_addrof: { + AddrofNode *addrof = static_cast(stmt); + dep.var_live.push(addrof->GetStIdx().Idx()); + break; + } + case OP_addroffunc: { + // We don't handle function pointer + break; + } + case OP_dassign: { + DassignNode *dassign = static_cast(stmt); + dep_dassign_expansion(dassign, dep, reg_to_stmt, var_to_stmt, func); + break; + } + case OP_dassignoff: { + // TODO: + // It is not documented in MAPLE IR. + break; + } + default: { + for (size_t i = 0; i < stmt->NumOpnds(); i++) { + dep_expansion(stmt->Opnd(i), dep, reg_to_stmt, var_to_stmt, func); + } + break; + } + } +} + +set_check commit(set_check old, set_check latest) { + old.opcode.insert(old.opcode.end(), latest.opcode.begin(), latest.opcode.end()); + old.register_terminal.insert(old.register_terminal.end(), latest.register_terminal.begin(), + latest.register_terminal.end()); + old.var_terminal.insert(old.var_terminal.end(), latest.var_terminal.begin(), latest.var_terminal.end()); + old.const_int64.insert(old.const_int64.end(), latest.const_int64.begin(), latest.const_int64.end()); + old.const_str.insert(old.const_str.end(), latest.const_str.begin(), latest.const_str.end()); + old.type_num.insert(old.type_num.end(), latest.type_num.begin(), latest.type_num.end()); + return old; +} + +bool sat_check(const set_check& a, const set_check& b) { + if (compareVectors(a.opcode, b.opcode) + /* + A strict check should also check + compareVectors(a.register_terminal,b.register_terminal) + compareVectors(a.var_terminal,b.var_terminal) + compareVectors(a.const_int64,b.const_int64) + */ + ) { + return true; + } + return false; +} + +void gen_register_dep(StmtNode *stmt, set_check &br_tmp, std::map> reg_to_stmt, + std::map> var_to_stmt, const MeFunction& func) { + while (!br_tmp.register_live.empty()) { + int32_t register_to_check = br_tmp.register_live.top(); + auto iter = reg_to_stmt.find(register_to_check); + br_tmp.register_live.pop(); + if (iter != reg_to_stmt.end()) { + StmtNode *latest_stmt_tmp = retLatest_Regassignment(stmt, register_to_check); + if (latest_stmt_tmp != nullptr) { + set_check br_tmp_go; + dep_expansion(latest_stmt_tmp, br_tmp_go, reg_to_stmt, var_to_stmt, func); + gen_register_dep(latest_stmt_tmp, br_tmp_go, reg_to_stmt, var_to_stmt, func); + br_tmp = commit(br_tmp, br_tmp_go); + } + } else { + br_tmp.register_terminal.push_back(register_to_check); + } + } + while (!br_tmp.var_live.empty()) { + uint32_t var_to_check = br_tmp.var_live.top(); + auto iter = var_to_stmt.find(var_to_check); + br_tmp.var_live.pop(); + if (iter != var_to_stmt.end()) { + StmtNode *latest_stmt_tmp = retLatest_Varassignment(stmt, var_to_check); + if (latest_stmt_tmp != nullptr) { + set_check br_tmp_go_var; + dep_expansion(latest_stmt_tmp, br_tmp_go_var, reg_to_stmt, var_to_stmt, func); + gen_register_dep(latest_stmt_tmp, br_tmp_go_var, reg_to_stmt, var_to_stmt, func); + br_tmp = commit(br_tmp, br_tmp_go_var); + } + } else { + br_tmp.var_terminal.push_back(var_to_check); + } + } +} + +bool isVar_redefined(BaseNode *stmt, std::vector &stmt_reg) { + switch (stmt->GetOpCode()) { + case OP_dread: { + DreadNode *dread = static_cast(stmt); + stmt_reg.push_back(dread->GetStIdx().Idx()); + break; + } + default: { + for (size_t i = 0; i < stmt->NumOpnds(); i++) { + isVar_redefined(stmt->Opnd(i), stmt_reg); + } + } + } + if (stmt->GetOpCode() == OP_dassign || stmt->GetOpCode() == OP_maydassign) { + DassignNode *dassign = static_cast(stmt); + if (std::count(stmt_reg.begin(), stmt_reg.end(), dassign->GetStIdx().Idx())) { + // value update + return false; + } else { + return true; + } + } + return false; +} + +void recursion(BaseNode *stmt, std::vector &stmt_reg) { + switch (stmt->GetOpCode()) { + case OP_regread: { + RegreadNode *regread = static_cast(stmt); + stmt_reg.push_back(regread->GetRegIdx()); + break; + } + default: { + for (size_t i = 0; i < stmt->NumOpnds(); i++) { + recursion(stmt->Opnd(i), stmt_reg); + } + } + } +} + +// stmtID to reduciable stmt ID +std::map gen_dynmatch(std::string file_name) { + // read log files and parse the stmtID with br information + FILE *fp; + auto log_name = file_name + ".log"; + // to hold the temp data + std::map ret_log_update; + + fp = fopen(("./" + log_name).c_str(), "r"); + if (fp == nullptr) { + abort(); + } + // 1. Parse SAN-SAN + while (true) { + int cur_id; + int rc = fscanf_s(fp, "%d", &cur_id, sizeof(cur_id)); + if (rc != 1) { + break; + } + int stmt_ID_cur = cur_id >> 1; + int br_true_tmp = (stmt_ID_cur << 1) ^ cur_id; + if (ret_log_update.count(stmt_ID_cur)) { + // L:0, R:1 + if (br_true_tmp == 1) { + ret_log_update[stmt_ID_cur].r_ctr += 1; + } else { + ret_log_update[stmt_ID_cur].l_ctr += 1; + } + ret_log_update[stmt_ID_cur].tot_ctr += 1; + } else { + san_struct tmp_san_struct; + tmp_san_struct.stmtID = stmt_ID_cur; + tmp_san_struct.tot_ctr = 1; + tmp_san_struct.l_ctr = 0; + tmp_san_struct.r_ctr = 0; + if (br_true_tmp == 1) { + tmp_san_struct.r_ctr += 1; + } else { + tmp_san_struct.l_ctr += 1; + } + ret_log_update[stmt_ID_cur] = tmp_san_struct; + } + } + fclose(fp); + return ret_log_update; +} + +bool dynamic_sat(const san_struct& a, const san_struct& b, bool SCSC) { + // For SC-UC case, SC must be var a + if (a.tot_ctr == b.tot_ctr) { + if ((a.l_ctr == b.l_ctr) || (a.l_ctr == b.r_ctr)) { + return true; + } else { + return false; + } + } else if (!SCSC) { + // true is 0 + if (a.tot_ctr == b.l_ctr) { + if ((a.tot_ctr == a.l_ctr) || (a.tot_ctr == a.r_ctr)) { + return true; + } else { + return false; + } + } else if (a.tot_ctr == b.r_ctr) { + if ((a.tot_ctr == a.l_ctr) || (a.tot_ctr == a.r_ctr)) { + return true; + } else { + return false; + } + } else { + return false; + } + } else { + return false; + } +} + +} // namespace maple + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/src/san_phase_manager.cpp b/src/mapleall/maple_san/src/san_phase_manager.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d55f50bc8c61b2e97e72ce05449ddb1dfed6791a --- /dev/null +++ b/src/mapleall/maple_san/src/san_phase_manager.cpp @@ -0,0 +1,89 @@ +#ifdef ENABLE_MAPLE_SAN + +#include "san_phase_manager.h" +#include "asan_phases.h" +#include "ubsan_phases.h" + + +namespace maple { + +void MEModuleDoAsan::GetAnalysisDependence(maple::AnalysisDep &aDep) const { + aDep.SetPreservedAll(); +} + +void MEModuleDoAsan::DoPhasesPopulate(const maple::MIRModule &mirModule) { + #define SAN_PHASE + #include "phases.def" + #undef SAN_PHASE +} + +bool MEModuleDoAsan::FuncLevelRun(MeFunction &meFunc, AnalysisDataManager &serialADM) { + bool changed = false; + for (size_t i = 0; i < phasesSequence.size(); ++i) { + SolveSkipFrom(MeOption::GetSkipFromPhase(), i); + const MaplePhaseInfo *curPhase = MaplePhaseRegister::GetMaplePhaseRegister()->GetPhaseByID(phasesSequence[i]); + if (!IsQuiet()) { + LogInfo::MapleLogger() << "---Run maple_san " << (curPhase->IsAnalysis() ? "analysis" : "transform") + << " Phase [ " << curPhase->PhaseName() << " ]---\n"; + } + if (curPhase->IsAnalysis()) { + changed |= RunAnalysisPhase(*curPhase, serialADM, meFunc); + } else { + changed |= RunTransformPhase(*curPhase, serialADM, meFunc); + } + SolveSkipAfter(MeOption::GetSkipAfterPhase(), i); + } + return changed; +} + +bool MEModuleDoAsan::PhaseRun(maple::MIRModule &m) { + bool changed = false; + // TODO: We have not instrumented global values + // TODO: ModuleAddressSanitizer AsanModule(m); + // TODO: AsanModule.instrumentModule(); + auto &compFuncList = m.GetFunctionList(); + auto admMempool = AllocateMemPoolInPhaseManager("ASAN phase manager's analysis data manager mempool"); + auto *serialADM = GetManagerMemPool()->New(*(admMempool.get())); + ClearAllPhases(); + DoPhasesPopulate(m); + SetQuiet(MeOption::quiet); + size_t i = 0; + for (auto &func : std::as_const(compFuncList)) { + ASSERT_NOT_NULL(func); + ++i; + if (func->IsEmpty()) { + continue; + } + m.SetCurFunction(func); + if (!IsQuiet()) { + LogInfo::MapleLogger() << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>> Sanitize Function < " << func->GetName() + << " id=" << func->GetPuidxOrigin() << " >---\n"; + /* prepare me func */ + auto meFuncMP = std::make_unique(memPoolCtrler, "maple_san per-function mempool"); + auto meFuncStackMP = std::make_unique(memPoolCtrler, ""); + MemPool *versMP = new ThreadLocalMemPool(memPoolCtrler, "first verst mempool"); + MeFunction &meFunc = *(meFuncMP->New(&m, func, meFuncMP.get(), *meFuncStackMP, versMP, meInput)); + func->SetMeFunc(&meFunc); + meFunc.PartialInit(); + if (!IsQuiet()) { + LogInfo::MapleLogger() << "---Preparing Function < " << func->GetName() << " > [" << i - 1 << "] ---\n"; + } + meFunc.Prepare(); + changed = FuncLevelRun(meFunc, *serialADM); + meFunc.Release(); + serialADM->EraseAllAnalysisPhase(); + } + } + m.Emit("comb.san.mpl"); + return changed; +} + +MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MEModuleDoAsan, doModuleAsan) + +MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MEDoAsan, doAsan); +MAPLE_ANALYSIS_PHASE_REGISTER(MEDoVarCheck, doAsanVarCheck); +MAPLE_TRANSFORM_PHASE_REGISTER_CANSKIP(MEDoUbsanBound, doUbsanBound); + +} // namespace maple + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/src/ubsan_bounds.cpp b/src/mapleall/maple_san/src/ubsan_bounds.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ee09f10be4234e1677a01da1507fc2487424f566 --- /dev/null +++ b/src/mapleall/maple_san/src/ubsan_bounds.cpp @@ -0,0 +1,455 @@ +// +// Created by wchenbt on 9/5/2021. +// +#ifdef ENABLE_MAPLE_SAN + +#include "ubsan_bounds.h" +#include "me_function.h" +#include "mir_builder.h" +#include "san_common.h" + + +namespace maple { + + size_t log2_64(size_t value) { + size_t n = 64; + size_t y; + y = value >>32; if (y != 0) { n = n -32; value = y; } + y = value >>16; if (y != 0) { n = n -16; value = y; } + y = value >> 8; if (y != 0) { n = n - 8; value = y; } + y = value >> 4; if (y != 0) { n = n - 4; value = y; } + y = value >> 2; if (y != 0) { n = n - 2; value = y; } + y = value >> 1; if (y != 0) { n = n - 1; } + return 64 - n; + } + + static void getTypeKind(MIRType *mirType, uint16_t *typeKind, uint16_t *typeInfo) { + + *typeKind = 0xffff; + *typeInfo = 0; + + PrimitiveType primType = PrimitiveType(mirType->GetPrimType()); + + if (primType.IsInteger()) { + *typeKind = 0; + *typeInfo = (log2_64(mirType->GetSize() << 3) << 1) | \ + (primType.IsUnsigned() ? 0 : 1); + } else if (primType.IsFloat()) { + *typeKind = 1; + *typeInfo = mirType->GetSize(); + } else { + // Not implemented + mirType->Dump(0, false); + LogInfo::MapleLogger() << "The above mirType has not been implemented yet!\n"; + } + } + + ArrayInfo::ArrayInfo (StmtNode *usedStmt, MIRArrayType *arrayType, ArrayNode *arrayNode) + : usedStmt(usedStmt), arrayType(arrayType) { + for (size_t i = 1; i < arrayNode->NumOpnds(); i++) { + this->offset.push_back(arrayNode->Opnd(i)); + } + for (uint16 i = 0; i < arrayType->GetDim(); i++) { + this->dimensions.push_back(arrayType->GetSizeArrayItem(i)); + } + MIRType *type; + MIRArrayType *element = arrayType; + while (element) { + elemType.push_back(element); + type = element->GetElemType(); + element = dynamic_cast(type); + } + elemType.push_back(type); + } + + size_t ArrayInfo::GetElementSize() { + return this->elemType.back()->GetSize(); + } + + void ArrayInfo::SetNeededSize(size_t size) { + this->neededSize = size; + } + + std::string ArrayInfo::GetArrayTypeName(size_t dim) { + std::string ret = std::string(GetPrimTypeName(elemType.back()->GetPrimType())) + " "; + for (size_t i = dim; i < this->dimensions.size(); i++) { + ret += "[" + std::to_string(this->dimensions.at(i)) + "]"; + } + ret = '\'' + ret + "\'"; + return ret.c_str(); + } + + BoundCheck::BoundCheck(MeFunction *func) : func(func) { + mirModule = &(func->GetMIRModule()); + mirBuilder = mirModule->GetMIRBuilder(); + initializeCallbacks(); + } + + std::vector BoundCheck::getArrayInfo(StmtNode *stmtNode) { + std::vector toBeChecked; + + std::stack baseNodeStack; + baseNodeStack.push(stmtNode); + while (!baseNodeStack.empty()) { + BaseNode *baseNode = baseNodeStack.top(); + baseNodeStack.pop(); + if (baseNode->GetOpCode() == OP_iassign) { + IassignNode *iassign = dynamic_cast(baseNode); // iassign + if (iassign->Opnd(0)->GetOpCode() != OP_array) { + continue; + } + ArrayNode *arrayNode = dynamic_cast(iassign->Opnd(0)); + MIRArrayType *tmpArrayType = dynamic_cast(arrayNode-> + GetArrayType(GlobalTables::GetTypeTable())); + ArrayInfo arrayInfo(stmtNode, tmpArrayType, arrayNode); + + MIRType *mirType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(iassign->GetTyIdx()); + MIRPtrType *pointerType = static_cast(mirType); + MIRType *pointedTy = GlobalTables::GetTypeTable().GetTypeFromTyIdx(pointerType->GetPointedTyIdx()); + arrayInfo.SetNeededSize(pointedTy->GetSize()); + + AddrofNode *addrofNode = dynamic_cast(arrayNode->GetBase()); + // CHECK_FATAL(addrofNode != nullptr, "The base of arrayNode is not of type AddrofNode"); + if (addrofNode == nullptr) { + continue; + } + + toBeChecked.push_back(arrayInfo); + } + if (baseNode->GetOpCode() == OP_iread) { + IreadNode *iread = dynamic_cast(baseNode); // iread + if (iread->Opnd(0)->GetOpCode() != OP_array) { + continue; + } + ArrayNode *arrayNode = dynamic_cast(iread->Opnd(0)); + MIRArrayType *tmpArrayType = dynamic_cast(arrayNode-> + GetArrayType(GlobalTables::GetTypeTable())); + ArrayInfo arrayInfo(stmtNode, tmpArrayType, arrayNode); + + MIRType *mirType = GlobalTables::GetTypeTable().GetTypeFromTyIdx(iread->GetTyIdx()); + MIRPtrType *pointerType = static_cast(mirType); + MIRType *pointedTy = GlobalTables::GetTypeTable().GetTypeFromTyIdx(pointerType->GetPointedTyIdx()); + arrayInfo.SetNeededSize(pointedTy->GetSize()); + + AddrofNode *addrofNode = dynamic_cast(arrayNode->GetBase()); + if (addrofNode == nullptr) { + continue; + } + + toBeChecked.push_back(arrayInfo); + } + for (size_t j = 0; j < baseNode->NumOpnds(); ++j) { + baseNodeStack.push(baseNode->Opnd(j)); + } + } + return toBeChecked; + } + + void BoundCheck::getBoundsCheckCond(ArrayInfo *arrayInfo, BlockNode *body, size_t dim) { + + LogInfo::MapleLogger() << "\nInstrument for " << arrayInfo->neededSize << " bytes\n"; + + MIRType *uint64 = GlobalTables::GetTypeTable().GetUInt64(); + CHECK_FATAL(arrayInfo->offset.size() == arrayInfo->dimensions.size(), + "The offset.size and array dimension.size do not match"); + MIRSymbol *offsetSym = getOrCreateSymbol(mirBuilder, uint64->GetTypeIndex(), + "ubsan_offset", kStVar, kScAuto, + mirBuilder->GetMirModule().CurFunction(), kScopeLocal); + ConstvalNode *size = mirBuilder->CreateIntConst(arrayInfo->GetElementSize() * arrayInfo->dimensions[dim], PTY_u64); + + BaseNode *offset = mirBuilder->CreateExprBinary(OP_mul, *uint64, arrayInfo->offset[dim], + mirBuilder->CreateIntConst(arrayInfo->GetElementSize(), PTY_u64)); + DassignNode *dassignNode = mirBuilder->CreateStmtDassign(offsetSym->GetStIdx(), 0, offset); + body->InsertBefore(arrayInfo->usedStmt, dassignNode); + DreadNode *dreadNode = mirBuilder->CreateDread(*offsetSym, PTY_u64); + BinaryNode *ObjSize = mirBuilder->CreateExprBinary(OP_sub, *uint64, size, dreadNode); + // Offset >= 0 + CompareNode *Cmp1 = mirBuilder->CreateExprCompare(OP_lt, *uint64, *uint64, dreadNode, + mirBuilder->CreateIntConst(0, PTY_u64)); + // Size >= Offset + CompareNode *Cmp2 = mirBuilder->CreateExprCompare(OP_lt, *uint64, *uint64, size, dreadNode); + // Size - Offset >= NeededSize + CompareNode *Cmp3 = mirBuilder->CreateExprCompare(OP_lt, *uint64, *uint64, ObjSize, + mirBuilder->CreateIntConst(arrayInfo->neededSize, PTY_u64)); + arrayInfo->checks.push_back({Cmp1, Cmp2, Cmp3}); + } + + + void BoundCheck::insertBoundsCheck(ArrayInfo *arrayInfo, size_t dim) { + StmtNode *insertBefore = arrayInfo->usedStmt; + MIRType *uint64 = GlobalTables::GetTypeTable().GetUInt64(); + + // first check : offset >= 0 + LabelIdx labelIdx = mirBuilder->GetMirModule().CurFunction()->GetLabelTab()->CreateLabel(); + mirBuilder->GetMirModule().CurFunction()->GetLabelTab()->AddToStringLabelMap(labelIdx); + + DassignNode *dassignNode = mirBuilder->CreateStmtDassign(*symbol_1, 0, arrayInfo->checks[dim][0]); + dassignNode->InsertAfterThis(*insertBefore); + CondGotoNode *brStmt = mirBuilder->CreateStmtCondGoto(arrayInfo->checks[dim][0], OP_brtrue, labelIdx); + brStmt->InsertAfterThis(*insertBefore); + dassignNode = mirBuilder->CreateStmtDassign(*symbol_1, 0, arrayInfo->checks[dim][1]); + dassignNode->InsertBeforeThis(*brStmt); + + brStmt->SetOffset(labelIdx); + LabelNode *labelStmt = mirBuilder->GetMirModule().CurFuncCodeMemPool()->New(); + labelStmt->SetLabelIdx(labelIdx); + labelStmt->InsertAfterThis(*insertBefore); + + CompareNode *cmpNode = mirBuilder->CreateExprCompare(OP_ne, *uint64, *uint64, + mirBuilder->CreateDread(*symbol_1, PTY_u64), + mirBuilder->CreateIntConst(0, PTY_u64)); + dassignNode = mirBuilder->CreateStmtDassign(*symbol_2, 0, cmpNode); + dassignNode->InsertAfterThis(*insertBefore); + + // second check: size >= offset + labelIdx = mirBuilder->GetMirModule().CurFunction()->GetLabelTab()->CreateLabel(); + mirBuilder->GetMirModule().CurFunction()->GetLabelTab()->AddToStringLabelMap(labelIdx); + + brStmt = mirBuilder->CreateStmtCondGoto(cmpNode, OP_brtrue, labelIdx); + brStmt->InsertBeforeThis(*dassignNode); + dassignNode = mirBuilder->CreateStmtDassign(*symbol_2, 0, arrayInfo->checks[dim][2]); + dassignNode->InsertBeforeThis(*brStmt); + + brStmt->SetOffset(labelIdx); + labelStmt = mirBuilder->GetMirModule().CurFuncCodeMemPool()->New(); + labelStmt->SetLabelIdx(labelIdx); + labelStmt->InsertAfterThis(*insertBefore); + + // third check: size - offset >= neededsize + labelIdx = mirBuilder->GetMirModule().CurFunction()->GetLabelTab()->CreateLabel(); + mirBuilder->GetMirModule().CurFunction()->GetLabelTab()->AddToStringLabelMap(labelIdx); + + cmpNode = mirBuilder->CreateExprCompare(OP_ne, *uint64, *uint64, + mirBuilder->CreateDread(*symbol_2, PTY_u64), + mirBuilder->CreateIntConst(0, PTY_u64)); + brStmt = mirBuilder->CreateStmtCondGoto(cmpNode, OP_brfalse, labelIdx); + brStmt->InsertAfterThis(*insertBefore); + + + brStmt->SetOffset(labelIdx); + labelStmt = mirBuilder->GetMirModule().CurFuncCodeMemPool()->New(); + labelStmt->SetLabelIdx(labelIdx); + labelStmt->InsertAfterThis(*insertBefore); + + // Initialize the field sourceLoc + std::string srcFileName = ""; + if (!mirModule->GetSrcFileInfo().empty()) { + size_t size = mirModule->GetSrcFileInfo().size(); + size_t i = 0; + for (auto infoElem : mirModule->GetSrcFileInfo()) { + srcFileName += GlobalTables::GetStrTable().GetStringFromStrIdx(infoElem.first); + if (i++ < size - 1) { + srcFileName += ",\n"; + } + } + } + + + std::string::size_type iPos = srcFileName.find_last_of('/') + 1; + UStrIdx moduleName = GlobalTables::GetUStrTable().GetOrCreateStrIdxFromName(srcFileName.substr(iPos, srcFileName.length() - iPos)); + ConststrNode *constNode = mirModule->CurFuncCodeMemPool()->New(PTY_a64, moduleName); + IassignNode *iassignNode = mirBuilder->CreateStmtIassign(*GlobalTables::GetTypeTable().GetOrCreatePointerType(*sourceLocType), + 1,mirBuilder->CreateAddrof(*sourceLoc), constNode); + iassignNode->InsertAfterThis(*labelStmt); + + iassignNode = mirBuilder->CreateStmtIassign(*GlobalTables::GetTypeTable().GetOrCreatePointerType(*sourceLocType), + 2, mirBuilder->CreateAddrof(*sourceLoc), + mirBuilder->GetConstUInt32(arrayInfo->usedStmt->GetSrcPos().LineNum())); + iassignNode->InsertAfterThis(*labelStmt); + + iassignNode = mirBuilder->CreateStmtIassign(*GlobalTables::GetTypeTable().GetOrCreatePointerType(*sourceLocType), + 3, mirBuilder->CreateAddrof(*sourceLoc), + mirBuilder->GetConstUInt32(arrayInfo->usedStmt->GetSrcPos().Column())); + iassignNode->InsertAfterThis(*labelStmt); + + // Initialize the field arrayType + uint16_t typeKind, typeInfo; + if (dim < arrayInfo->elemType.size()) { + getTypeKind(arrayInfo->elemType[dim], &typeKind, &typeInfo); + } else { + getTypeKind(arrayInfo->elemType.back(), &typeKind, &typeInfo); + } + iassignNode = mirBuilder->CreateStmtIassign(*GlobalTables::GetTypeTable().GetOrCreatePointerType(*typeDescriptor), + 1, mirBuilder->CreateAddrof(*arrayType), + mirBuilder->GetConstUInt32(typeKind)); + iassignNode->InsertAfterThis(*labelStmt); + + iassignNode = mirBuilder->CreateStmtIassign(*GlobalTables::GetTypeTable().GetOrCreatePointerType(*typeDescriptor), + 2, mirBuilder->CreateAddrof(*arrayType), + mirBuilder->GetConstUInt32(typeInfo)); + iassignNode->InsertAfterThis(*labelStmt); + + MapleVector arguments(mirBuilder->GetCurrentFuncCodeMpAllocator()->Adapter()); + + + UStrIdx typeName = GlobalTables::GetUStrTable().GetOrCreateStrIdxFromName(arrayInfo->GetArrayTypeName(dim)); + size_t arraySize = arrayInfo->GetArrayTypeName(dim).size(); + CHECK_FATAL(arraySize < 99, "Too long name for this arrayType."); + constNode = mirModule->CurFuncCodeMemPool()->New(PTY_a64, typeName); + arguments.push_back(mirBuilder->CreateExprBinary(OP_add, *GlobalTables::GetTypeTable().GetAddr64(), + mirBuilder->CreateAddrof(*arrayType, PTY_u64), + mirBuilder->CreateIntConst(4, PTY_a64))); + arguments.push_back(constNode); + arguments.push_back(mirBuilder->GetConstUInt32(uint32_t(arraySize + 1))); + IntrinsiccallNode *intrinsiccallNode = mirBuilder->CreateStmtIntrinsicCall(INTRN_C_memcpy, arguments); + intrinsiccallNode->InsertAfterThis(*labelStmt); + + arguments.clear(); + arguments.push_back(mirBuilder->CreateExprBinary(OP_add, *GlobalTables::GetTypeTable().GetAddr64(), + mirBuilder->CreateAddrof(*arrayType, PTY_u64), + mirBuilder->CreateIntConst(int(5 + arraySize), PTY_a64))); + arguments.push_back(mirBuilder->CreateIntConst(0, PTY_u32)); + arguments.push_back(mirBuilder->GetConstUInt32(uint32_t(99 - arraySize))); + intrinsiccallNode = mirBuilder->CreateStmtIntrinsicCall(INTRN_C_memset, arguments); + intrinsiccallNode->InsertAfterThis(*labelStmt); + + + // Initialize the field indexType + getTypeKind(GlobalTables::GetTypeTable().GetPrimType(arrayInfo->offset[dim]->GetPrimType()), + &typeKind, &typeInfo); + + iassignNode = mirBuilder->CreateStmtIassign(*GlobalTables::GetTypeTable().GetOrCreatePointerType(*typeDescriptor), + 1, mirBuilder->CreateAddrof(*indexType), + mirBuilder->GetConstUInt32(typeKind)); + iassignNode->InsertAfterThis(*labelStmt); + + iassignNode = mirBuilder->CreateStmtIassign(*GlobalTables::GetTypeTable().GetOrCreatePointerType(*typeDescriptor), + 2, mirBuilder->CreateAddrof(*indexType), + mirBuilder->GetConstUInt32(typeInfo)); + iassignNode->InsertAfterThis(*labelStmt); + + typeName = GlobalTables::GetUStrTable().GetOrCreateStrIdxFromName(GetPrimTypeName(arrayInfo->offset[dim]->GetPrimType())); + constNode = mirModule->CurFuncCodeMemPool()->New(PTY_a64, typeName); + + arraySize = strlen(GetPrimTypeName(arrayInfo->offset[dim]->GetPrimType())); + CHECK_FATAL(arraySize < 99, "Too long name for PrimTypeName."); + arguments.clear(); + arguments.push_back(mirBuilder->CreateExprBinary(OP_add, *GlobalTables::GetTypeTable().GetAddr64(), + mirBuilder->CreateAddrof(*indexType, PTY_u64), + mirBuilder->CreateIntConst(4, PTY_a64))); + arguments.push_back(constNode); + arguments.push_back(mirBuilder->GetConstUInt32(uint32_t(arraySize + 1))); + intrinsiccallNode = mirBuilder->CreateStmtIntrinsicCall(INTRN_C_memcpy, arguments); + intrinsiccallNode->InsertAfterThis(*labelStmt); + + arguments.clear(); + arguments.push_back(mirBuilder->CreateExprBinary(OP_add, *GlobalTables::GetTypeTable().GetAddr64(), + mirBuilder->CreateAddrof(*indexType, PTY_u64), + mirBuilder->CreateIntConst(int(5 + arraySize), PTY_a64))); + arguments.push_back(mirBuilder->CreateIntConst(0, PTY_u32)); + arguments.push_back(mirBuilder->GetConstUInt32(uint32_t(99 - arraySize))); + intrinsiccallNode = mirBuilder->CreateStmtIntrinsicCall(INTRN_C_memset, arguments); + intrinsiccallNode->InsertAfterThis(*labelStmt); + + + iassignNode = mirBuilder->CreateStmtIassign(*GlobalTables::GetTypeTable().GetOrCreatePointerType(*outofBoundsData), + 1, mirBuilder->CreateAddrof(*outofBound), + mirBuilder->CreateDread(*sourceLoc, PTY_agg)); + iassignNode->InsertAfterThis(*labelStmt); + + iassignNode = mirBuilder->CreateStmtIassign(*GlobalTables::GetTypeTable().GetOrCreatePointerType(*outofBoundsData), + 5, mirBuilder->CreateAddrof(*outofBound), + mirBuilder->CreateAddrof(*arrayType)); + iassignNode->InsertAfterThis(*labelStmt); + + iassignNode = mirBuilder->CreateStmtIassign(*GlobalTables::GetTypeTable().GetOrCreatePointerType(*outofBoundsData), + 6,mirBuilder->CreateAddrof(*outofBound), + mirBuilder->CreateAddrof(*indexType)); + iassignNode->InsertAfterThis(*labelStmt); + + MapleVector args(mirBuilder->GetCurrentFuncCodeMpAllocator()->Adapter()); + args.emplace_back(mirBuilder->CreateAddrof(*outofBound, PTY_a64)); + args.emplace_back(arrayInfo->offset[dim]); + CallNode* callNode = mirBuilder->CreateStmtCall(ubsanHandler->GetPuidx(), args); + callNode->InsertAfterThis(*labelStmt); + } + + void BoundCheck::initializeCallbacks() { + + FieldVector fieldVector; + FieldVector parentFileds; + + MIRPtrType *int8Ptr = static_cast(GlobalTables::GetTypeTable().GetOrCreatePointerType( + GlobalTables::GetTypeTable().GetInt8()->GetTypeIndex())); + + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "Filename", *int8Ptr); + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "Line", *GlobalTables::GetTypeTable().GetUInt32()); + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "Column", *GlobalTables::GetTypeTable().GetUInt32()); + sourceLocType = static_cast( + GlobalTables::GetTypeTable().GetOrCreateStructType( + "SourceLocation", fieldVector, parentFileds, mirBuilder->GetMirModule())); + fieldVector.clear(); + + MIRArrayType *charArray = static_cast( + GlobalTables::GetTypeTable().GetOrCreateArrayType(*GlobalTables::GetTypeTable().GetInt8(), 100)); + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "TypeKind", *GlobalTables::GetTypeTable().GetUInt16()); + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "TypeInfo", *GlobalTables::GetTypeTable().GetUInt16()); + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "TypeName", *charArray); + typeDescriptor = static_cast( + GlobalTables::GetTypeTable().GetOrCreateStructType( + "TypeDescriptor", fieldVector, parentFileds, mirBuilder->GetMirModule())); + fieldVector.clear(); + + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "Loc", *sourceLocType); + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "ArrayType", *GlobalTables::GetTypeTable().GetOrCreatePointerType(*typeDescriptor)); + GlobalTables::GetTypeTable().PushIntoFieldVector( + fieldVector, "IndexType", *GlobalTables::GetTypeTable().GetOrCreatePointerType(*typeDescriptor)); + + outofBoundsData = static_cast( + GlobalTables::GetTypeTable().GetOrCreateStructType( + "OutOfBoundsData", fieldVector, parentFileds, mirBuilder->GetMirModule())); + + ubsanHandler = getOrInsertFunction(mirBuilder, "__ubsan_handle_out_of_bounds", + GlobalTables::GetTypeTable().GetVoid(), + {GlobalTables::GetTypeTable().GetInt8(), + GlobalTables::GetTypeTable().GetUInt64()}); + + symbol_1 = getOrCreateSymbol(mirBuilder, GlobalTables::GetTypeTable().GetUInt64()->GetTypeIndex(), + "ubsan_cmp_1", kStVar,kScAuto, + mirBuilder->GetMirModule().CurFunction(), kScopeLocal); + symbol_2 = getOrCreateSymbol(mirBuilder, GlobalTables::GetTypeTable().GetUInt64()->GetTypeIndex(), + "ubsan_cmp_2", kStVar,kScAuto, + mirBuilder->GetMirModule().CurFunction(), kScopeLocal); + + outofBound = getOrCreateSymbol(mirBuilder, outofBoundsData->GetTypeIndex(), + "ubsan_outOfBound", kStVar, kScAuto, func->GetMirFunc(), kScopeLocal); + + sourceLoc = getOrCreateSymbol(mirBuilder, typeDescriptor->GetTypeIndex(), + "ubsan_sourceLoc", kStVar, kScAuto, func->GetMirFunc(), kScopeLocal); + + arrayType = getOrCreateSymbol(mirBuilder, typeDescriptor->GetTypeIndex(), + "ubsan_arrayType", kStVar, kScAuto, func->GetMirFunc(), kScopeLocal); + + indexType = getOrCreateSymbol(mirBuilder, typeDescriptor->GetTypeIndex(), + "ubsan_indexType", kStVar, kScAuto, func->GetMirFunc(), kScopeLocal); + + } + + bool BoundCheck::addBoundsChecking() { + for (auto &stmt : func->GetMirFunc()->GetBody()->GetStmtNodes()) { + std::vector toBeChecked = getArrayInfo(&stmt); + if (toBeChecked.empty()) { + continue; + } + for (ArrayInfo arrayInfo: toBeChecked) { + if (arrayInfo.offset.size()) { + for (size_t i = 0; i < arrayInfo.offset.size(); i++) { + getBoundsCheckCond(&arrayInfo, func->GetMirFunc()->GetBody(), i); + insertBoundsCheck(&arrayInfo, i); + } + } + } + } + return true; + } +} // namespace maple + +#endif \ No newline at end of file diff --git a/src/mapleall/maple_san/src/ubsan_phases.cpp b/src/mapleall/maple_san/src/ubsan_phases.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f51290c849c2b028d59277d0701544c50e5708b6 --- /dev/null +++ b/src/mapleall/maple_san/src/ubsan_phases.cpp @@ -0,0 +1,19 @@ +#ifdef ENABLE_MAPLE_SAN + +#include "ubsan_phases.h" +#include "ubsan_bounds.h" + + +namespace maple { + void MEDoUbsanBound::GetAnalysisDependence(maple::AnalysisDep &aDep) const { + aDep.SetPreservedAll(); + } + + bool MEDoUbsanBound::PhaseRun(MeFunction &func) { + BoundCheck boundCheck(&func); + boundCheck.addBoundsChecking(); + return true; + } +} + +#endif \ No newline at end of file diff --git a/test/c_test/sanitizer/juliet_test_suite b/test/c_test/sanitizer/juliet_test_suite new file mode 160000 index 0000000000000000000000000000000000000000..66dbbd4184275a77f4e33b667bfc49d44a9ac6c6 --- /dev/null +++ b/test/c_test/sanitizer/juliet_test_suite @@ -0,0 +1 @@ +Subproject commit 66dbbd4184275a77f4e33b667bfc49d44a9ac6c6 diff --git a/test/c_test/sanitizer/simple/.gitignore b/test/c_test/sanitizer/simple/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..6f249308ef81c157c11419cce7da3e62fa2f22f1 --- /dev/null +++ b/test/c_test/sanitizer/simple/.gitignore @@ -0,0 +1,6 @@ +*.txt +*.s +*.mpl +*.ast +*.o + diff --git a/test/c_test/sanitizer/simple/run_asan.sh b/test/c_test/sanitizer/simple/run_asan.sh new file mode 100755 index 0000000000000000000000000000000000000000..f4d0202c712c3321b219bdfa812a31c8f8c2fe4c --- /dev/null +++ b/test/c_test/sanitizer/simple/run_asan.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +FILENAME=$1 +arkcc=$MAPLE_ROOT/scripts/arkcc_asan.py + +run_cmd(){ + cmd=$1 + echo $cmd + eval $cmd + if [ $? -ne 0 ]; then + echo "failed" + exit + fi +} + +# compile to object and insert asan logics +cmd="$arkcc $FILENAME -o $FILENAME.out" +run_cmd "$cmd" + +# cmd="$GCC_LINARO_PATH/bin/aarch64-linux-gnu-gcc $FILENAME.s -o $FILENAME.out -lasan -ldl -lpthread -lm -lrt" +# run_cmd "$cmd" + +# use qemu-aarch64 to test, note that the runtime library of asan could be replaced with clang's implementation with RBTree +cmd="ASAN_OPTIONS=detect_leaks=0 qemu-aarch64 -L /usr/aarch64-linux-gnu -E LD_LIBRARY_PATH=$GCC_LINARO_PATH/aarch64-linux-gnu/lib64 $FILENAME.out" +echo $cmd +eval $cmd + +# clean tmp files +rm $FILENAME.ast $FILENAME.s $FILENAME.mpl $FILENAME.me.mpl $FILENAME.o comb.me.mpl comb.san.mpl +rm $FILENAME.out + diff --git a/test/c_test/sanitizer/simple/test1.c b/test/c_test/sanitizer/simple/test1.c new file mode 100644 index 0000000000000000000000000000000000000000..e14cc14e20f28f92536ba6ec7c572be45045b554 --- /dev/null +++ b/test/c_test/sanitizer/simple/test1.c @@ -0,0 +1,11 @@ +// ./run_asan.sh test1.c +#include +#include + +int main(int argc, char **argv) { + int *array = (int *) malloc(100 * sizeof(int)); + free(array); + int ret_res = array[argc]; + return ret_res; + // return aray[argc]; // BOOM +} diff --git a/test/c_test/sanitizer/simple/test2.c b/test/c_test/sanitizer/simple/test2.c new file mode 100644 index 0000000000000000000000000000000000000000..212a927424e4b8ba22df515b224036b3293b0e35 --- /dev/null +++ b/test/c_test/sanitizer/simple/test2.c @@ -0,0 +1,9 @@ +// ./run_asan.sh test2.c +#include + +int main() { + char d[20]; + d[21] = 1; + printf("%d\n", d[11]); + return 0; +} diff --git a/test/c_test/sanitizer/simple/test3.c b/test/c_test/sanitizer/simple/test3.c new file mode 100644 index 0000000000000000000000000000000000000000..0377ee3bc5251707468b05c3d08211634f216242 --- /dev/null +++ b/test/c_test/sanitizer/simple/test3.c @@ -0,0 +1,13 @@ +// ./run_asan.sh test3.c +#include + +void test(int a) { + if (a) { + int *d = (int *)alloca(100 * sizeof(int)); + d[101] = 1; + } +} +int main() { + test(1); + return 0; +} diff --git a/test/c_test/sanitizer/simple/test4.c b/test/c_test/sanitizer/simple/test4.c new file mode 100644 index 0000000000000000000000000000000000000000..c54e7a55c027e93d96cc91f5913037a122a1a0a3 --- /dev/null +++ b/test/c_test/sanitizer/simple/test4.c @@ -0,0 +1,11 @@ +// ./run_asan.sh test4.c +#include + +void test(int m) { + int *d = (int *)malloc(m * sizeof(int)); + d[m] = 1; +} +int main() { + test(100); + return 0; +} diff --git a/test/c_test/sanitizer/simple/test5.c b/test/c_test/sanitizer/simple/test5.c new file mode 100644 index 0000000000000000000000000000000000000000..77a2c0c0cffa0f0c33b845ca8f4c173982bee23d --- /dev/null +++ b/test/c_test/sanitizer/simple/test5.c @@ -0,0 +1,5 @@ +// ./run_asan.sh test5.c +int array[100] = {0}; +int main (int argc, char** argv) { + array[100] = 1; +} diff --git a/test/c_test/sanitizer/simple/test6.c b/test/c_test/sanitizer/simple/test6.c new file mode 100644 index 0000000000000000000000000000000000000000..d3b6bced0502bb36a8f1cae1fbc867c6fd2767ee --- /dev/null +++ b/test/c_test/sanitizer/simple/test6.c @@ -0,0 +1,7 @@ +// ./run_ubsan.sh test6.c +int main() { + int mas[11][12][13]; + // mas[11][9] = 20; + // mas[9][10][13] = 100; + mas[9][12][1] = 100; +} diff --git a/test/c_test/sanitizer/simple/test7.c b/test/c_test/sanitizer/simple/test7.c new file mode 100644 index 0000000000000000000000000000000000000000..e694203d60fc3e5af15bb2776d4f7f9669cebf07 --- /dev/null +++ b/test/c_test/sanitizer/simple/test7.c @@ -0,0 +1,9 @@ +// ./run_ubsan.sh test7.c + +#include + +int main() { + char src[] = "hello"; + char buf[3]; + strcpy(buf, src); +}