diff --git a/build.sh b/build.sh index d71165f7c988ded447ce1f26f063d9f65a37ac57..9be0fd996c5836280dae329fe3bee3b8b7537960 100755 --- a/build.sh +++ b/build.sh @@ -9,6 +9,7 @@ buildtype=RelWithDebInfo backends="ARM;AArch64;X86" build_for_openeuler="0" enabled_projects="clang;lld;compiler-rt;openmp;clang-tools-extra" +enable_bisheng_autotuner="1" embedded_toolchain="0" split_dwarf=on use_ccache="0" @@ -47,6 +48,7 @@ Usage: $0 [options] Build the compiler under $build_prefix, then install under $install_prefix. Options: + -a Enable BiSheng Autotuner. -b type Specify CMake build type (default: $buildtype). -c Use ccache (default: $use_ccache). -e Build for embedded cross tool chain. @@ -67,8 +69,11 @@ EOF # Process command-line options. Remember the options for passing to the # containerized build script. -while getopts :b:ceEhiI:j:orstvfX: optchr; do +while getopts :ab:ceEhiI:j:orstvfX: optchr; do case "$optchr" in + a) + enable_bisheng_autotuner="1" + ;; b) buildtype="$OPTARG" case "${buildtype,,}" in @@ -195,6 +200,11 @@ if [ $build_for_openeuler == "1" ]; then CMAKE_OPTIONS="$CMAKE_OPTIONS -DBUILD_FOR_OPENEULER=ON" fi +if [ $enable_bisheng_autotuner == "1" ]; then + echo "Build with BiSheng Autotuner" + CMAKE_OPTIONS="$CMAKE_OPTIONS -DLLVM_ENABLE_AUTOTUNER=ON" +fi + # Build and install if [ $clean -eq 1 -a -e "$install_prefix" ]; then rm -rf "$install_prefix" diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt index f010e04f62cd8c2be78dd6f3119f20ee65cd9f6c..e449d279059777fd8577b3bd865c92c23eeaf9a2 100644 --- a/clang/include/clang/Basic/CMakeLists.txt +++ b/clang/include/clang/Basic/CMakeLists.txt @@ -1,6 +1,12 @@ +set(CLANG_BASIC_OPTIONS) +if(LLVM_ENABLE_AUTOTUNER) + list(APPEND CLANG_BASIC_OPTIONS "-DENABLE_AUTOTUNER") +endif() + macro(clang_diag_gen component) clang_tablegen(Diagnostic${component}Kinds.inc -gen-clang-diags-defs -clang-component=${component} + ${CLANG_BASIC_OPTIONS} SOURCE Diagnostic.td TARGET ClangDiagnostic${component}) endmacro(clang_diag_gen) @@ -18,20 +24,24 @@ clang_diag_gen(Refactoring) clang_diag_gen(Sema) clang_diag_gen(Serialization) clang_tablegen(DiagnosticGroups.inc -gen-clang-diag-groups + ${CLANG_BASIC_OPTIONS} SOURCE Diagnostic.td TARGET ClangDiagnosticGroups) clang_tablegen(DiagnosticIndexName.inc -gen-clang-diags-index-name + ${CLANG_BASIC_OPTIONS} SOURCE Diagnostic.td TARGET ClangDiagnosticIndexName) clang_tablegen(AttrList.inc -gen-clang-attr-list -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ + ${CLANG_BASIC_OPTIONS} SOURCE Attr.td TARGET ClangAttrList) clang_tablegen(AttrSubMatchRulesList.inc -gen-clang-attr-subject-match-rule-list -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ + ${CLANG_BASIC_OPTIONS} SOURCE Attr.td TARGET ClangAttrSubjectMatchRuleList) @@ -43,6 +53,7 @@ clang_tablegen(AttrTokenKinds.inc -gen-clang-attr-token-kinds clang_tablegen(AttrHasAttributeImpl.inc -gen-clang-attr-has-attribute-impl -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ + ${CLANG_BASIC_OPTIONS} SOURCE Attr.td TARGET ClangAttrHasAttributeImpl ) @@ -67,15 +78,19 @@ clang_tablegen(arm_mve_builtin_aliases.inc -gen-arm-mve-builtin-aliases SOURCE arm_mve.td TARGET ClangARMMveBuiltinAliases) clang_tablegen(arm_sve_builtins.inc -gen-arm-sve-builtins + ${CLANG_BASIC_OPTIONS} SOURCE arm_sve.td TARGET ClangARMSveBuiltins) clang_tablegen(arm_sve_builtin_cg.inc -gen-arm-sve-builtin-codegen + ${CLANG_BASIC_OPTIONS} SOURCE arm_sve.td TARGET ClangARMSveBuiltinCG) clang_tablegen(arm_sve_typeflags.inc -gen-arm-sve-typeflags + ${CLANG_BASIC_OPTIONS} SOURCE arm_sve.td TARGET ClangARMSveTypeFlags) clang_tablegen(arm_sve_sema_rangechecks.inc -gen-arm-sve-sema-rangechecks + ${CLANG_BASIC_OPTIONS} SOURCE arm_sve.td TARGET ClangARMSveSemaRangeChecks) clang_tablegen(arm_sme_builtins.inc -gen-arm-sme-builtins diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 37586242953fee61c155026818a67f48e1cad142..6b68bc458b939a1806f918f07ccb4df7b9c6fab3 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -248,6 +248,15 @@ def err_drv_cannot_read_config_file : Error< "cannot read configuration file '%0': %1">; def err_drv_arg_requires_bitcode_input: Error< "option '%0' requires input to be LLVM bitcode">; +#ifdef ENABLE_AUTOTUNER +def err_drv_autotune_generic : Error<"%0">; +def err_drv_autotune_disabled_O0 : Error< + "-fautotune/-fautotune-generate should not be enabled at -O0">; +def err_drv_autotune_incorrect_env : Error< + "incorrect argument '%0' in environment variable used">; +def err_drv_autotune_no_filter_types : Error< + "no types added for filtering with %0">; +#endif def err_target_unsupported_arch : Error<"the target architecture '%0' is not supported by the target '%1'">; diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index 9ed9a88fa3d62d226826da7c9ae1b92c2d032f9f..11022962ae9e3893c4c2f05101d35a8aca0a3e58 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -346,4 +346,12 @@ def warn_profile_data_misexpect : Warning< def err_extract_api_ignores_file_not_found : Error<"file '%0' specified by '--extract-api-ignores=' not found">, DefaultFatal; +#ifdef ENABLE_AUTOTUNER +let CategoryName = "AutoTuning Issues" in { +def err_auto_tuning_error_reading : Error<"'%0'">; +def err_auto_tuning_error_dumping : Error<"'%0'">; +def err_unable_to_create_pass : Error< + "cannot create pass '%0' from AutoTuning input file">; +} // end of autoTuning issue category +#endif } diff --git a/clang/include/clang/Driver/CMakeLists.txt b/clang/include/clang/Driver/CMakeLists.txt index 8c0af1528a96c7faf5e60c56b1d366dffa50df4f..56fff6a2504e6e4af5adcff73aed16f9e495f6c9 100644 --- a/clang/include/clang/Driver/CMakeLists.txt +++ b/clang/include/clang/Driver/CMakeLists.txt @@ -8,7 +8,11 @@ endif() if (LLVM_ENABLE_CLASSIC_FLANG) list(APPEND CLANG_DRIVER_OPTIONS -DENABLE_CLASSIC_FLANG ) endif() - + +if (LLVM_ENABLE_AUTOTUNER) + list(APPEND CLANG_DRIVER_OPTIONS "-DENABLE_AUTOTUNER" ) +endif() + tablegen(LLVM Options.inc ${CLANG_DRIVER_OPTIONS} -gen-opt-parser-defs ) add_public_tablegen_target(ClangDriverOptions) diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index e3e98bad9912703058c496abe15793e758124b97..dcecb473b516f1f151966f46d928c27ecd721b1c 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -72,6 +72,14 @@ enum ModuleHeaderMode { HeaderMode_System }; +#if defined(ENABLE_AUTOTUNER) +enum AutoTuneKind { + AutoTuneNone, + AutoTuneGenerate, + AutoTuneNext, +}; +#endif + /// Driver - Encapsulate logic for constructing compilation processes /// from a set of gcc-driver-like command line arguments. class Driver { @@ -119,6 +127,11 @@ class Driver { /// LTO mode selected via -f(no-offload-)?lto(=.*)? options. LTOKind OffloadLTOMode; +#if defined(ENABLE_AUTOTUNER) + /// AutoTune mode selected via -fautotune or -fautotune-generate option + AutoTuneKind AutoTuneMode; +#endif + public: enum OpenMPRuntimeKind { /// An unknown OpenMP runtime. We can't generate effective OpenMP code @@ -191,6 +204,21 @@ public: /// Information about the host which can be overridden by the user. std::string HostBits, HostMachine, HostSystem, HostRelease; +#if defined(ENABLE_AUTOTUNER) + /// The path to the llvm-autotune data directory. + std::string AutoTuneDirDataPath; + /// Path for project base directory. Base directory is removed from absolute + /// path and relative path is used as (coarse-grain) code region name. This + /// allow to port a config file from one machine/location to another. + std::string AutoTuneProjectDir; + + /// Whether to prepare the compiler to produce additional metadata + /// that will be consumed by Autotuner's ML model + bool IsMLTuningEnabled; + + std::string AutoTuneOptions; +#endif + /// The file to log CC_PRINT_PROC_STAT_FILE output to, if enabled. std::string CCPrintStatReportFilename; @@ -705,6 +733,14 @@ public: return IsOffload ? OffloadLTOMode : LTOMode; } +#if defined(ENABLE_AUTOTUNER) + /// Returns true if we are performing any kind of AutoTune. + bool isUsingAutoTune() const { return AutoTuneMode != AutoTuneNone; } + + /// Get the specific kind of AutoTune being performed. + AutoTuneKind getAutoTuneMode() const { return AutoTuneMode; } +#endif + private: /// Tries to load options from configuration files. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c5cc66c58f250daf792d5c05eacda0b715b16e90..71d6ed66ab9680f8d36a8877a82e5dd1ff8b3f88 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1795,6 +1795,19 @@ def fmemory_profile_use_EQ : Joined<["-"], "fmemory-profile-use=">, Group, Flags<[CC1Option, CoreOption]>, MetaVarName<"">, HelpText<"Use memory profile for profile-guided memory optimization">, MarshallingInfoString>; +#ifdef ENABLE_AUTOTUNER +// Auto-tuning flags. +def fautotune : Flag<["-"], "fautotune">, Group, + HelpText<"Auto-tune with the compiler configuration under 'autotune_datadir' (overridden by AUTOTUNE_DATADIR env var)">; +def fautotune_EQ : Joined<["-"], "fautotune=">, Group, + HelpText<"Auto-tune with the compiler configuration of the specified id under 'autotune_datadir' (overridden by AUTOTUNE_DATADIR env var)">; +def fautotune_generate : Flag<["-"], "fautotune-generate">, Group, + HelpText<"Generate initial compiler configuration for Function/Loop code regions under 'autotune_datadir' (overridden by AUTOTUNE_DATADIR env var)">; +def fautotune_generate_EQ : CommaJoined<["-"], "fautotune-generate=">, Group, + HelpText<"Generate initial compiler configuration for the given comma-separated list of code regions under 'autotune_datadir' (overridden by AUTOTUNE_DATADIR env var)">, Values<"Other,Function,Loop,MachineBasicBlock">; +def fautotune_rank : Flag<["-"], "fautotune-rank">, Group, + HelpText<"Generate files necessary for ML-guided ranking">; +#endif #ifdef BUILD_FOR_OPENEULER def fgcc_compatible : Flag<["-"], "fgcc-compatible">, Group, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index f962d60870d19a90321668a9238b605ccec71607..cef5e0d16ba7aa621a966bd6e24be7e918c0f7f2 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -88,6 +88,10 @@ using namespace llvm; llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); #include "llvm/Support/Extension.def" +#if defined(ENABLE_AUTOTUNER) +#include "llvm/Analysis/AutotuningDump.h" +#endif + namespace llvm { extern cl::opt DebugInfoCorrelate; @@ -1021,6 +1025,27 @@ void EmitAssemblyHelper::RunOptimizationPipeline( }); } +#if defined(ENABLE_AUTOTUNER) + bool Changed = false; + // If autotuning is enabled (for applying configuration), use AutoTuner + // generated pass ordering instead of passes in compilation pipeline. Passes + // before and after the compilation pipeline will be intact. + if (autotuning::Engine.isEnabled()) { + std::vector PassesList; + Changed = autotuning::Engine.lookUpGlobalParams("OptPass", PassesList); + if (Changed && PassesList.size()) { + std::string PassPipeline = ""; + for (auto PassName : PassesList) + PassPipeline.append(PassName + ","); + PassPipeline.pop_back(); + + if (auto Err = PB.parsePassPipeline(MPM, PassPipeline)) + errs() << "AutoTuner: cannot add pass:" << toString(std::move(Err)) + << "\n"; + } + } + if (!Changed) { +#endif if (IsThinLTO || (IsLTO && CodeGenOpts.UnifiedLTO)) { MPM = PB.buildThinLTOPreLinkDefaultPipeline(Level); } else if (IsLTO) { @@ -1028,6 +1053,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline( } else { MPM = PB.buildPerModuleDefaultPipeline(Level); } +#if defined(ENABLE_AUTOTUNER) + } +#endif } // Add a verifier pass if requested. We don't have to do this if the action @@ -1078,6 +1106,12 @@ void EmitAssemblyHelper::RunOptimizationPipeline( } } +#if defined(ENABLE_AUTOTUNER) + // Please ensure this pass is added after all optimization passes. + if (autotuning::Engine.isEnabled()) + MPM.addPass(RequireAnalysisPass()); +#endif + // Now that we have all of the passes ready, run them. { PrettyStackTraceString CrashInfo("Optimizer"); @@ -1125,6 +1159,22 @@ void EmitAssemblyHelper::RunCodegenPipeline( void EmitAssemblyHelper::EmitAssembly(BackendAction Action, std::unique_ptr OS) { TimeRegion Region(CodeGenOpts.TimePasses ? &CodeGenerationTime : nullptr); + +#if defined(ENABLE_AUTOTUNER) + // AUTO-TUNING - auto-tuning initialization for this module. + // Initialize it before parsing command-line options because we want to + // overwrite the llvm options using the config file. + if (Error E = autotuning::Engine.init(TheModule->getModuleIdentifier())) { + Diags.Report(diag::err_auto_tuning_error_reading) << toString(std::move(E)); + return; + } + if (autotuning::Engine.isEnabled() && autotuning::Engine.isParseInput() && + (autotuning::Engine.LLVMParams.size() || + autotuning::Engine.ProgramParams.size())) + llvm::cl::ParseAutoTunerOptions(autotuning::Engine.LLVMParams, + autotuning::Engine.ProgramParams); +#endif + setCommandLineOpts(CodeGenOpts); bool RequiresCodeGen = actionRequiresCodeGen(Action); @@ -1142,6 +1192,14 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, RunOptimizationPipeline(Action, OS, ThinLinkOS); RunCodegenPipeline(Action, OS, DwoOS); +#if defined(ENABLE_AUTOTUNER) + // AUTO-TUNING - auto-tuning finalization for this module + if (Error E = autotuning::Engine.finalize()) { + Diags.Report(diag::err_auto_tuning_error_dumping) << toString(std::move(E)); + return; + } +#endif + if (ThinLinkOS) ThinLinkOS->keep(); if (DwoOS) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 819d7703b2e76c5ca389dca5dc302741880abd5c..bd9db7714f9588eac4d4dea8b15ee5179415debc 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -104,6 +104,14 @@ #if LLVM_ON_UNIX #include // getpid #endif +#if defined(ENABLE_AUTOTUNER) +// Constant definition for environment variable to enable AutoTuner and set +// the mode to generate opportunities or apply configurations. +const std::string AutoTuneModeStr = "AUTOTUNE_MODE"; +// Constant definition for environment variable to specify the project base +// directory. +const std::string AutoTunePrjDirStr = "AUTOTUNE_PROJECT_DIR"; +#endif using namespace clang::driver; using namespace clang; @@ -200,6 +208,9 @@ Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple, SaveTemps(SaveTempsNone), BitcodeEmbed(EmbedNone), Offload(OffloadHostDevice), CXX20HeaderType(HeaderMode_None), ModulesModeCXX20(false), LTOMode(LTOK_None), +#if defined(ENABLE_AUTOTUNER) + AutoTuneMode(AutoTuneNone), +#endif ClangExecutable(ClangExecutable), SysRoot(DEFAULT_SYSROOT), DriverTitle(Title), CCCPrintBindings(false), CCPrintOptions(false), CCLogDiagnostics(false), CCGenDiagnostics(false), @@ -1379,6 +1390,77 @@ Compilation *Driver::BuildCompilation(ArrayRef ArgList) { setLTOMode(Args); +#if defined(ENABLE_AUTOTUNER) + // Process -fautotune and -fautotune-generate flags. + bool IsAutoTuneGenerate = Args.hasArg(options::OPT_fautotune_generate, + options::OPT_fautotune_generate_EQ); + bool IsAutoTune = + Args.hasArg(options::OPT_fautotune, options::OPT_fautotune_EQ); + // Check if the environment variable AUTOTUNE_MODE is used instead of + // -fautotune-generate/-fautotune. + if (!IsAutoTuneGenerate && !IsAutoTune) { + if (std::optional MaybeMode = + llvm::sys::Process::GetEnv(AutoTuneModeStr)) { + StringRef Mode = *MaybeMode; + StringRef OrgMode = *MaybeMode; + if (Mode.consume_front("-fautotune-generate")) { + if (Mode.empty() || Mode.startswith("=")) + IsAutoTuneGenerate = true; + else + Diags.Report(diag::err_drv_autotune_incorrect_env) << OrgMode; + } else if (Mode.consume_front("-fautotune")) { + if (Mode.empty() || Mode.startswith("=")) + IsAutoTune = true; + else + Diags.Report(diag::err_drv_autotune_incorrect_env) << OrgMode; + } else { + Diags.Report(diag::err_drv_autotune_incorrect_env) << OrgMode; + } + + if (Mode.consume_front("=")) { + if (Mode.empty()) + Diags.Report(diag::err_drv_autotune_no_filter_types) + << (IsAutoTuneGenerate ? "-fautotune-generate=" : "-fautotune="); + + AutoTuneOptions = Mode.str(); + } + } + } + + IsMLTuningEnabled = Args.hasArg(options::OPT_fautotune_rank); + + if (IsAutoTuneGenerate && IsAutoTune) + Diags.Report(diag::err_drv_argument_not_allowed_with) + << "-fautotune" + << "-fautotune-generate"; + + if (IsMLTuningEnabled && !(IsAutoTuneGenerate || IsAutoTune)) + Diags.Report(diag::err_drv_argument_only_allowed_with) + << "-fautotune-rank" + << "-fautotune or -fautotune-generate"; + + if (IsAutoTuneGenerate || IsAutoTune) { + // Check if the environment variable AUTOTUNE_DATADIR is set. + if (std::optional MaybePath = + llvm::sys::Process::GetEnv("AUTOTUNE_DATADIR")) + AutoTuneDirDataPath = *MaybePath; + else + AutoTuneDirDataPath = "autotune_datadir"; + + // Check if the environment variable AUTOTUNE_PROJECT_DIR is set. + if (std::optional MaybeProjectDIR = + llvm::sys::Process::GetEnv(AutoTunePrjDirStr)) + AutoTuneProjectDir = *MaybeProjectDIR; + else + AutoTuneProjectDir = ""; + + if (IsAutoTuneGenerate) + AutoTuneMode = AutoTuneGenerate; + if (IsAutoTune) + AutoTuneMode = AutoTuneNext; + } +#endif + // Process -fembed-bitcode= flags. if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) { StringRef Name = A->getValue(); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 515c5e7239119b417f4f3241a00082ade4b6514d..ad1eae2452270a5fb8d85d7232d0b5646bcd24e7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5984,6 +5984,27 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (!Triple.isNVPTX() && !Triple.isAMDGCN()) addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs); +#if defined(ENABLE_AUTOTUNER) + // Add Auto-tuning options. + if (C.getDriver().isUsingAutoTune()) { + Arg *A = Args.getLastArg(options::OPT_O_Group); + if (!A || A->getOption().matches(options::OPT_O0)) + D.Diag(clang::diag::err_drv_autotune_disabled_O0); + + // Enable debug info when Auto-tuning options are specified. + CmdArgs.push_back("-debug-info-kind=line-tables-only"); + if (!D.AutoTuneProjectDir.empty()) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back(Args.MakeArgString(Twine("-autotuning-project-dir=") + + D.AutoTuneProjectDir)); + } + if (D.getAutoTuneMode() == AutoTuneKind::AutoTuneGenerate) + AddAutoTuningOpportunities(Args, D, CmdArgs); + else if (D.getAutoTuneMode() == AutoTuneKind::AutoTuneNext) + AddAutoTuningInput(Args, D, CmdArgs); + } +#endif + Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ); if (getLastProfileSampleUseArg(Args) && diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 1ccc83a468ce6dcd6714bd45afe2aaa94a0494aa..e01b21e102b1143e39d52345840f11eb81f133bf 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -2429,6 +2429,119 @@ void tools::addMachineOutlinerArgs(const Driver &D, } } +#if defined(ENABLE_AUTOTUNER) +static bool isAcceptableThinLTOCodeRegion(StringRef CR) { + if ((CR.equals("CallSite") || CR.equals("Loop") || CR.equals("Function") || + CR.equals("MachineBasicBlock"))) + return false; + return true; +} + +static bool processOpportunitiesOptions(StringRef CR, bool IsThinLTO, + std::string &CodeRegionsFilterStr) { + // Check if the argument has a valid value. + if (!(CR.equals("Other") || CR.equals("LLVMParam") || CR.equals("CallSite") || + CR.equals("Function") || CR.equals("Loop") || + CR.equals("MachineBasicBlock") || CR.equals("Switch") || + CR.equals("ProgramParam"))) + return false; + + // Disable fine grain tuning for thin LTO during link time optimization. + if (IsThinLTO && !isAcceptableThinLTOCodeRegion(CR)) { + llvm::errs() + << "error: fine-grained autotuning not supported in ThinLTO mode\n"; + return false; + } + + if (!CodeRegionsFilterStr.empty()) + CodeRegionsFilterStr += ','; + CodeRegionsFilterStr += CR; + return true; +} + +// Add AutoTuner options for generating tuning opporutnities. +// IsThinLTO will only be true during link time optimization for -flto=thin. +void tools::AddAutoTuningOpportunities(const ArgList &Args, const Driver &D, + ArgStringList &CmdArgs, bool IsThinLTO) { + // Dump CodeRegions into opportunity files. + CmdArgs.push_back("-mllvm"); + SmallString<128> OppPath = StringRef(D.AutoTuneDirDataPath); + llvm::sys::path::append(OppPath, "opp"); + StringRef RawTypeFilterStr = D.AutoTuneOptions; + CmdArgs.push_back(Args.MakeArgString(Twine("-auto-tuning-opp=") + OppPath)); + if (D.IsMLTuningEnabled) { + // Baseline config is -1 + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back(Args.MakeArgString(Twine("-auto-tuning-config-id=-1"))); + } + // Filter CodeRegions by type. + std::string CodeRegionsFilterStr; + if (Arg *A = Args.getLastArg(options::OPT_fautotune_generate_EQ)) { + for (StringRef CR : A->getValues()) { + if (!processOpportunitiesOptions(CR, IsThinLTO, CodeRegionsFilterStr)) + D.Diag(diag::err_drv_unsupported_option_argument) + << A->getOption().getName() << CR; + } + } else if (!RawTypeFilterStr.empty()) { + SmallVector TypeFilters; + RawTypeFilterStr.split(TypeFilters, ','); + for (StringRef CR : TypeFilters) { + if (!processOpportunitiesOptions(CR, IsThinLTO, CodeRegionsFilterStr)) + D.Diag(diag::err_drv_unsupported_option_argument) + << "fautotune-generate" << CR; + } + } else { + if (IsThinLTO) + D.Diag(diag::err_drv_autotune_generic) + << "AutoTuner: no valid code region type specified for ThinLTO mode"; + // Otherwise by default, dump CodeRegions of Function and Loop type. + CodeRegionsFilterStr = "CallSite,Function,Loop"; + } + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back( + Args.MakeArgString("-auto-tuning-type-filter=" + CodeRegionsFilterStr)); +} + +static bool processInputOptions(StringRef Options, SmallString<128> &Path, + const ArgList &Args, const Driver &D, + llvm::opt::ArgStringList &CmdArgs) { + unsigned Value = 0; + // Check if the argument is an integer type. + if (Options.getAsInteger(10, Value)) + return false; + llvm::sys::path::append(Path, "config-" + Twine(Value) + ".yaml"); + if (D.IsMLTuningEnabled) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back( + Args.MakeArgString(Twine("-auto-tuning-config-id=" + Twine(Value)))); + } + return true; +} + +void tools::AddAutoTuningInput(const ArgList &Args, const Driver &D, + llvm::opt::ArgStringList &CmdArgs) { + SmallString<128> InputPath = StringRef(D.AutoTuneDirDataPath); + StringRef RawOptionsStr = D.AutoTuneOptions; + + if (Arg *A = Args.getLastArg(options::OPT_fautotune_EQ)) { + if (!processInputOptions(StringRef(A->getValue()), InputPath, Args, D, + CmdArgs)) + D.Diag(diag::err_drv_invalid_int_value) + << A->getAsString(Args) << A->getValue(); + } else if (!RawOptionsStr.empty()) { + if (!processInputOptions(RawOptionsStr, InputPath, Args, D, CmdArgs)) + D.Diag(diag::err_drv_invalid_int_value) + << "-fautotune=" + RawOptionsStr.str() << RawOptionsStr; + } else { + llvm::sys::path::append(InputPath, "config.yaml"); + } + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back( + Args.MakeArgString(Twine("-auto-tuning-input=") + InputPath)); + setenv("AUTOTUNE_INPUT", Args.MakeArgString(InputPath), 1); +} +#endif + void tools::addOpenMPDeviceRTL(const Driver &D, const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index bd5cb1bb866e030b8ae6a69a387025b8b0b1a1fc..36103655c52242a2b82717fc688e675611ab0735 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -240,6 +240,14 @@ void addMachineOutlinerArgs(const Driver &D, const llvm::opt::ArgList &Args, const llvm::Triple &Triple, bool IsLTO, const StringRef PluginOptPrefix = ""); +#if defined(ENABLE_AUTOTUNER) +void AddAutoTuningOpportunities(const llvm::opt::ArgList &Args, const Driver &D, + llvm::opt::ArgStringList &CmdArgs, + bool isThinLTO = false); +void AddAutoTuningInput(const llvm::opt::ArgList &Args, const Driver &D, + llvm::opt::ArgStringList &CmdArgs); +#endif + void addOpenMPDeviceRTL(const Driver &D, const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, StringRef BitcodeSuffix, const llvm::Triple &Triple); diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 8c9d0c0a3e73b53122266e76c5bd4081d6189402..dec2ceddb010d0a3f32c817e6821bb8e48165264 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -682,6 +682,40 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_T); +#if defined(ENABLE_AUTOTUNER) + // AutoTuner related features will only be enabled for LTO build during + // linking phase. Otherwise, non LTO build will require lld linker + // unnecessarily (other linkers do not support AutoTuner). + if (D.isUsingAutoTune() && D.isUsingLTO()) { + bool LinkerIsLLD = false; + (void) ToolChain.GetLinkerPath(&LinkerIsLLD); + // AutoTuner support is only available for LLD Linker. + if (!LinkerIsLLD) + D.Diag(clang::diag::err_drv_lto_without_lld); + + bool IsThinLTO = D.getLTOMode() == LTOK_Thin; + if (!D.AutoTuneProjectDir.empty()) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back(Args.MakeArgString(Twine("-autotuning-project-dir=") + + D.AutoTuneProjectDir)); + } + // Enable tuning of callsites cause all of the callsites will have local + // linkage during LTO and they are not tuned by default. + CmdArgs.push_back(Args.MakeArgString("-mllvm")); + CmdArgs.push_back( + Args.MakeArgString("-auto-tuning-enable-local-callsite-tuning=true")); + if (D.getAutoTuneMode() == AutoTuneKind::AutoTuneGenerate) { + AddAutoTuningOpportunities(Args, D, CmdArgs, IsThinLTO); + } else if (D.getAutoTuneMode() == AutoTuneKind::AutoTuneNext) { + AddAutoTuningInput(Args, D, CmdArgs); + if (IsThinLTO) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-autotuning-thin-lto=true"); + } + } + } +#endif + const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::AtFileCurCP(), diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index 310f67774a660503b8f6f21eac888c36a8f63424..92beeef9bd5e6fc589cc3905f10bcf4d87d4788d 100644 --- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -222,6 +222,33 @@ bool ExecuteCompilerInvocation(CompilerInstance *Clang) { // This should happen AFTER plugins have been loaded! if (!Clang->getFrontendOpts().LLVMArgs.empty()) { unsigned NumArgs = Clang->getFrontendOpts().LLVMArgs.size(); +#if defined(ENABLE_AUTOTUNER) + // Both incremental compilation (for AutoTuner) and 'opt-bisect-limit' + // changes the behavior of compilation pipeline. If incremental compilation + // is used along with 'opt-bisect-limit' then 'opt-bisect-limit' is + // preferred and incremental compilation is disabled. + unsigned BisectLimitFound = 0; + unsigned CompileModeFound = 0; + for (unsigned Idx = 0; Idx != NumArgs; ++Idx) { + if (Clang->getFrontendOpts().LLVMArgs[Idx].find("-opt-bisect-limit=") != + std::string::npos) + BisectLimitFound = Idx; + if (Clang->getFrontendOpts().LLVMArgs[Idx].find( + "-auto-tuning-compile-mode=") != std::string::npos) + CompileModeFound = Idx; + if (BisectLimitFound && CompileModeFound) + break; + } + if (BisectLimitFound && CompileModeFound && + Clang->getFrontendOpts().LLVMArgs[CompileModeFound].compare( + "-auto-tuning-compile-mode=Inactive") != 0) { + Clang->getFrontendOpts().LLVMArgs[CompileModeFound] = + "-auto-tuning-compile-mode=Inactive"; + llvm::errs() << "AutoTunerCompile: Incremental compilation cannot work " + "with '-opt-bisect-limit' flag.\n" + "Disabling incremental compilation.\n"; + } +#endif auto Args = std::make_unique(NumArgs + 2); Args[0] = "clang (LLVM option parsing)"; for (unsigned i = 0; i != NumArgs; ++i) diff --git a/clang/test/Autotuning/BaselineConfig/Inputs/autotune_datadir/baseline-config.yaml b/clang/test/Autotuning/BaselineConfig/Inputs/autotune_datadir/baseline-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5e669c17a71bbade9008dea70948949eb86a090 --- /dev/null +++ b/clang/test/Autotuning/BaselineConfig/Inputs/autotune_datadir/baseline-config.yaml @@ -0,0 +1,9 @@ +!AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 12835463591102937421, + CodeRegionType: loop, Function: test, Invocation: 0, Name: for.body, + Pass: loop-unroll} +--- !AutoTuning {Args: [{VectorizationInterleave: 2}], + CodeRegionHash: 12835463591102937421, CodeRegionType: loop, Function: test, + Invocation: 0, Name: for.body, Pass: loop-vectorize} +--- !AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 8430337282115614432, + CodeRegionType: loop, Function: test, Invocation: 1, Name: vector.body, + Pass: loop-unroll} diff --git a/clang/test/Autotuning/BaselineConfig/Inputs/autotune_datadir/random-config.yaml b/clang/test/Autotuning/BaselineConfig/Inputs/autotune_datadir/random-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b0c338d26a7cf06359067fbc349d0bf98f17129d --- /dev/null +++ b/clang/test/Autotuning/BaselineConfig/Inputs/autotune_datadir/random-config.yaml @@ -0,0 +1,9 @@ +!AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 12835463591102937421, + CodeRegionType: loop, Function: test, Invocation: 0, Name: for.body, + Pass: loop-unroll} +--- !AutoTuning {Args: [{VectorizationInterleave: 2}], + CodeRegionHash: 12835463591102937421, CodeRegionType: loop, Function: test, + Invocation: 0, Name: for.body, Pass: loop-vectorize} +--- !AutoTuning {Args: [{UnrollCount: 2}], CodeRegionHash: 8430337282115614432, + CodeRegionType: loop, Function: test, Invocation: 1, Name: vector.body, + Pass: loop-unroll} diff --git a/clang/test/Autotuning/BaselineConfig/apply-baseline-config.c b/clang/test/Autotuning/BaselineConfig/apply-baseline-config.c new file mode 100644 index 0000000000000000000000000000000000000000..b110d6756b26a3b5e5490e437dcfde7794c78cde --- /dev/null +++ b/clang/test/Autotuning/BaselineConfig/apply-baseline-config.c @@ -0,0 +1,32 @@ +// Check that the baseline IR is the same as the IR from the first iteration of +// the autotuning process with --use-baseline-config enabled. +// REQUIRES: ! host-x86_64 + +// RUN: rm -f %t.baseline %t.firstIt_baseline %t.firstIt_random +// RUN: %clang -O3 %s -c -o %t.baseline +// RUN: strip %t.baseline +// RUN: %clang -O3 %s -c -o %t.firstIt_baseline -mllvm \ +// RUN: -auto-tuning-input=%S/Inputs/autotune_datadir/baseline-config.yaml \ +// RUN: -mllvm -auto-tuning-omit-metadata +// RUN: strip %t.firstIt_baseline +// RUN: cmp %t.firstIt_baseline %t.baseline + +// RUN: %clang -O3 %s -c -o %t.firstIt_random -mllvm \ +// RUN: -auto-tuning-input=%S/Inputs/autotune_datadir/random-config.yaml \ +// RUN: -mllvm -auto-tuning-omit-metadata +// RUN: strip %t.firstIt_random +// RUN: not cmp %t.firstIt_random %t.baseline + +#include +#include + +void test() { + int cs = 128370 * 1024 / sizeof(double); + double *flush = (double *)calloc(cs, sizeof(double)); + int i; + double tmp = 0.0; + for (i = 0; i < cs; i++) + tmp += flush[i]; + assert(tmp <= 10.0); + free(flush); +} diff --git a/clang/test/Autotuning/Driver/Inputs/config.yaml b/clang/test/Autotuning/Driver/Inputs/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6420c52c1a8a20bb9dd3afbc06b01253165d1bd0 --- /dev/null +++ b/clang/test/Autotuning/Driver/Inputs/config.yaml @@ -0,0 +1,3 @@ +!AutoTuning {Args: [{UnrollCount: 2}], CodeRegionType: loop, DebugLoc: { + Column: 5, File: clang/test/Autotuning/LoopDump/multiple_loops.c, Line: 21}, + Function: sum1d, Name: for.body, Pass: loop-unroll} diff --git a/clang/test/Autotuning/Driver/Inputs/template.yaml b/clang/test/Autotuning/Driver/Inputs/template.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a98a7c758d5e7e504a5d5b80a64eea0dd8fb41b8 --- /dev/null +++ b/clang/test/Autotuning/Driver/Inputs/template.yaml @@ -0,0 +1,9 @@ +--- !AutoTuning +Pass: pass-name +Name: opp-name +Function: func +CodeRegionType: [type] +CodeRegionHash: 4250615281180658289 +Args: + - DummyOption: true +... diff --git a/clang/test/Autotuning/Driver/autotune-generate-pipeline.c b/clang/test/Autotuning/Driver/autotune-generate-pipeline.c new file mode 100644 index 0000000000000000000000000000000000000000..66276f11eb5783363ed5cc15966614fe06ec69b3 --- /dev/null +++ b/clang/test/Autotuning/Driver/autotune-generate-pipeline.c @@ -0,0 +1,146 @@ +// Verify if -fautotune-generate can be invoked properly + +// RUN: %clang -fautotune-generate -O1 --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | FileCheck \ +// RUN: %s --check-prefix=GENERATE-DEFAULT + +// RUN: AUTOTUNE_DATADIR=/tmp/test_autotune_datadir/ %clang -fautotune-generate -O1 --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | FileCheck \ +// RUN: %s --check-prefix=GENERATE-ENV-VAR + +// RUN: %clang -fautotune-generate -O1 -flto -fuse-ld=lld --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | FileCheck \ +// RUN: %s --check-prefix=GENERATE-LTO-DEFAULT + +// RUN: AUTOTUNE_DATADIR=/tmp/test_autotune_datadir/ %clang -fautotune-generate -O1 -flto -fuse-ld=lld --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | FileCheck \ +// RUN: %s --check-prefix=GENERATE-LTO-ENV-VAR + +// RUN: %clang -fautotune-generate -O0 --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | FileCheck \ +// RUN: %s --check-prefix=ERROR-O0 + +// RUN: %clang -fautotune-generate -O1 -flto --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | FileCheck \ +// RUN: %s --check-prefix=ERROR-LTO-WITHOUT-LLD + +// RUN: export AUTOTUNE_MODE=-fautotune-generate +// RUN: %clang -O1 --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=GENERATE-DEFAULT + +// RUN: %clang -fautotune-generate -O1 -c -flto %s -### \ +// RUN: --sysroot %S/Inputs/basic_cross_linux_tree 2>&1 | \ +// RUN: FileCheck %s --check-prefix=LTO-COMPILE-ONLY + +// RUN: %clang -fautotune-generate -O1 -c -flto %s -v -o %t.tmp.o 2>&1 | \ +// RUN: FileCheck %s --check-prefix=LTO-COMPILE-ONLY + +// RUN: not %clang -fautotune-generate -O1 -flto -v %t.tmp.o 2>&1 | \ +// RUN: FileCheck %s --check-prefix=ERROR-LTO-WITHOUT-LLD + +// RUN: %clang -fautotune-generate -O1 -flto -v %t.tmp.o -fuse-ld=lld 2>&1 | \ +// RUN: FileCheck %s --check-prefix=LTO-LINK-ONLY + +// RUN: export AUTOTUNE_MODE=-fautotune-generate +// RUN: AUTOTUNE_DATADIR=/tmp/test_autotune_datadir/ %clang -O1 --sysroot \ +// RUN: %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=GENERATE-ENV-VAR + +// RUN: export AUTOTUNE_MODE=-fautotune-generate +// RUN: %clang -O1 -flto -fuse-ld=lld --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=GENERATE-LTO-DEFAULT + +// RUN: export AUTOTUNE_MODE=-fautotune-generate +// RUN: AUTOTUNE_DATADIR=/tmp/test_autotune_datadir/ %clang -O1 -flto -fuse-ld=lld \ +// RUN: --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=GENERATE-LTO-ENV-VAR + +// RUN: export AUTOTUNE_MODE=-fautotune-generate +// RUN: %clang -O0 --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=ERROR-O0 + +// RUN: export AUTOTUNE_MODE=-fautotune-generate +// RUN: %clang -O1 -flto --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=ERROR-LTO-WITHOUT-LLD + +// RUN: %clang -fautotune-generate -c -O1 -flto=thin -fuse-ld=lld --sysroot \ +// RUN: %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=GENERATE-THIN-SUCCESS1 + +// RUN: %clang -fautotune-generate=LLVMParam -O1 -flto=thin -fuse-ld=lld \ +// RUN: --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=GENERATE-THIN-SUCCESS2 + +// RUN: %clang -fautotune-generate -O1 -flto=thin -fuse-ld=lld --sysroot \ +// RUN: %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=GENERATE-THIN-FAIL1 + +// RUN: %clang -fautotune-generate=Loop -O1 -flto=thin -fuse-ld=lld --sysroot \ +// RUN: %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=GENERATE-THIN-FAIL2 + +int main() { return 0; } + +// GENERATE-DEFAULT: {{clang.* "-cc1"}} +// GENERATE-DEFAULT-SAME: -debug-info-kind=line-tables-only +// GENERATE-DEFAULT-SAME: -auto-tuning-opp=autotune_datadir/opp +// GENERATE-DEFAULT-SAME: -auto-tuning-type-filter=CallSite,Function,Loop +// GENERATE-DEFAULT: ld +// GENERATE-DEFAULT-NOT: -auto-tuning-opp=autotune_datadir/opp +// GENERATE-DEFAULT-NOT: -auto-tuning-type-filter=CallSite,Function,Loop + +// GENERATE-ENV-VAR: {{clang.* "-cc1"}} +// GENERATE-ENV-VAR-SAME: -auto-tuning-opp=/tmp/test_autotune_datadir/opp +// GENERATE-ENV-VAR-SAME: -auto-tuning-type-filter=CallSite,Function,Loop + +// GENERATE-LTO-DEFAULT: {{clang.* "-cc1"}} +// GENERATE-LTO-DEFAULT-SAME: -debug-info-kind=line-tables-only +// GENERATE-LTO-DEFAULT: -auto-tuning-opp=autotune_datadir/opp +// GENERATE-LTO-DEFAULT: -auto-tuning-type-filter=CallSite,Function,Loop +// GENERATE-LTO-DEFAULT: ld.lld +// GENERATE-LTO-DEFAULT-SAME: -auto-tuning-opp=autotune_datadir/opp +// GENERATE-LTO-DEFAULT-SAME: -auto-tuning-type-filter=CallSite,Function,Loop + +// GENERATE-LTO-ENV-VAR: {{clang.* "-cc1"}} +// GENERATE-LTO-ENV-VAR: -auto-tuning-opp=/tmp/test_autotune_datadir/opp +// GENERATE-LTO-ENV-VAR: -auto-tuning-type-filter=CallSite,Function,Loop +// GENERATE-LTO-ENV-VAR: ld.lld +// GENERATE-LTO-ENV-VAR-SAME: -auto-tuning-opp=/tmp/test_autotune_datadir/opp +// GENERATE-LTO-ENV-VAR-SAME: -auto-tuning-type-filter=CallSite,Function,Loop + +// ERROR-O0: error: -fautotune/-fautotune-generate should not be enabled at -O0 + +// ERROR-LTO-WITHOUT-LLD: error: LTO requires -fuse-ld=lld + +// GENERATE-THIN-SUCCESS1-NOT: error +// GENERATE-THIN-SUCCESS1: {{clang.* "-cc1"}} +// GENERATE-THIN-SUCCESS1-SAME: -auto-tuning-type-filter=CallSite,Function,Loop +// GENERATE-THIN-SUCCESS1-NOT: "{{.*}}ld.lld" + +// GENERATE-THIN-SUCCESS2-NOT: error: +// GENERATE-THIN-SUCCESS2: {{clang.* "-cc1"}} +// GENERATE-THIN-SUCCESS2-SAME: -auto-tuning-type-filter=LLVMParam +// GENERATE-THIN-SUCCESS2: "{{.*}}ld.lld" +// GENERATE-THIN-SUCCESS2-SAME: -auto-tuning-type-filter=LLVMParam + +// GENERATE-THIN-FAIL1: error: AutoTuner: no valid code region type specified +// GENERATE-THIN-FAIL1-SAME: for ThinLTO mode +// GENERATE-THIN-FAIL1: {{clang.* "-cc1"}} +// GENERATE-THIN-FAIL1-SAME: -auto-tuning-type-filter=CallSite,Function,Loop +// GENERATE-THIN-FAIL1: "{{.*}}ld.lld" +// GENERATE-THIN-FAIL1-SAME: -auto-tuning-type-filter=CallSite,Function,Loop + +// GENERATE-THIN-FAIL2: error: fine-grained autotuning not supported in ThinLTO +// GENERATE-THIN-FAIL2-SAME: mode +// GENERATE-THIN-FAIL2-NEXT: error: unsupported argument 'Loop' to option +// GENERATE-THIN-FAIL2-SAME: 'fautotune-generate=' +// GENERATE-THIN-FAIL2: {{clang.* "-cc1"}} +// GENERATE-THIN-FAIL2-SAME: -auto-tuning-type-filter=Loop +// GENERATE-THIN-FAIL2: "{{.*}}ld.lld" +// GENERATE-THIN-FAIL2-SAME: -auto-tuning-type-filter= + +// LTO-COMPILE-ONLY: {{clang.*-cc1}} +// LTO-COMPILE-ONLY-SAME: -debug-info-kind=line-tables-only +// LTO-COMPILE-ONLY-SAME: -auto-tuning-opp=autotune_datadir/opp +// LTO-COMPILE-ONLY-SAME: -auto-tuning-type-filter=CallSite,Function,Loop +// LTO-COMPILE-ONLY-NOT: ld.lld +// LTO-COMPILE-ONLY-NOT: error: LTO requires -fuse-ld=lld + +// LTO-LINK-ONLY-NOT: {{clang.*-cc1}} +// LTO-LINK-ONLY: ld.lld +// LTO-LINK-ONLY-SAME: -auto-tuning-opp=autotune_datadir/opp +// LTO-LINK-ONLY-SAME: -auto-tuning-type-filter=CallSite,Function,Loop diff --git a/clang/test/Autotuning/Driver/autotune-pipeline-thin-lto.c b/clang/test/Autotuning/Driver/autotune-pipeline-thin-lto.c new file mode 100644 index 0000000000000000000000000000000000000000..2ecc40e6f8d912bde7a0cc7403c30331814ffe7c --- /dev/null +++ b/clang/test/Autotuning/Driver/autotune-pipeline-thin-lto.c @@ -0,0 +1,42 @@ +// REQUIRES: asserts + +// RUN: rm -rf %t.data_dir +// RUN: export AUTOTUNE_DATADIR=%t.data_dir +// RUN: mkdir $AUTOTUNE_DATADIR + +// RUN: sed 's#\[type\]#loop#g' %S/Inputs/template.yaml > \ +// RUN: %t.data_dir/config.yaml +// RUN: %clang -O3 %s -flto=thin -fautotune -mllvm -debug-only=autotuning \ +// RUN: -fuse-ld=lld -Wl,-mllvm,-debug-only=autotuning 2>&1 | \ +// RUN: FileCheck %s --check-prefix=AUTOTUNE-CR-LOOP + +// RUN: sed 's#\[type\]#llvm-param#g' %S/Inputs/template.yaml > \ +// RUN: %t.data_dir/config.yaml +// RUN: %clang -O3 %s -flto=thin -fautotune -mllvm -debug-only=autotuning \ +// RUN: -fuse-ld=lld -Wl,-mllvm,-debug-only=autotuning 2>&1 | \ +// RUN: FileCheck %s --check-prefix=AUTOTUNE-CR-PARAM + +#include +#include + +int main() { + int cs = 128370 * 1024 / sizeof(double); + double *flush = (double *)calloc(cs, sizeof(double)); + int i; + double tmp = 0.0; + for (i = 0; i < cs; i++) + tmp += flush[i]; + assert(tmp <= 10.0); + free(flush); + return tmp; +} + +// AUTOTUNE-CR-LOOP-NOT: AutoTuner does not support tuning of {{.*}} thinLTO +// AUTOTUNE-CP-LOOP-NEXT: AutoTuningEngine is initialized. +// AUTOTUNE-CR-LOOP: AutoTuner does not support tuning of {{.*}} thinLTO +// AUTOTUNE-CP-LOOP-NEXT: AutoTuningEngine is initialized. + +// AUTOTUNE-CR-PARAM-NOT: AutoTuner does not support tuning of {{.*}} thinLTO +// AUTOTUNE-CP-PARAM-NEXT: AutoTuningEngine is initialized. +// AUTOTUNE-CR-PARAM-NOT: AutoTuner does not support tuning of {{.*}} thinLTO +// AUTOTUNE-CP-PARAM-NEXT: AutoTuningEngine is initialized. diff --git a/clang/test/Autotuning/Driver/autotune-pipeline.c b/clang/test/Autotuning/Driver/autotune-pipeline.c new file mode 100644 index 0000000000000000000000000000000000000000..2247c9cf5bd3855f569062d02dcb138560254f5f --- /dev/null +++ b/clang/test/Autotuning/Driver/autotune-pipeline.c @@ -0,0 +1,131 @@ +// Verify if -fautotune can be invoked properly + +// RUN: %clang -fautotune -O1 --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | FileCheck \ +// RUN: %s --check-prefix=AUTOTUNE-DEFAULT + +// RUN: %clang -fautotune=0 -O1 --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | FileCheck \ +// RUN: %s --check-prefix=AUTOTUNE-DEFAULT-ID0 + +// RUN: AUTOTUNE_DATADIR=/tmp/test_autotune_datadir/ %clang -fautotune=1 -O1 --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | FileCheck \ +// RUN: %s --check-prefix=AUTOTUNE-ENV-VAR-ID1 + +// RUN: %clang -fautotune -O1 -flto -fuse-ld=lld --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | FileCheck \ +// RUN: %s --check-prefix=AUTOTUNE-LTO-DEFAULT + +// RUN: AUTOTUNE_DATADIR=/tmp/test_autotune_datadir/ %clang -fautotune -O1 -flto -fuse-ld=lld --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | FileCheck \ +// RUN: %s --check-prefix=AUTOTUNE-LTO-ENV-VAR + +// RUN: %clang -fautotune -O0 --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | FileCheck \ +// RUN: %s --check-prefix=ERROR-O0 + +// RUN: %clang -fautotune -O1 -flto --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | FileCheck \ +// RUN: %s --check-prefix=ERROR-LTO-WITHOUT-LLD + +// RUN: %clang -fautotune=test -O1 --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | FileCheck \ +// RUN: %s --check-prefix=ERROR-NON-INTEGER-ID +// RUN: %clang -fautotune -O1 -c -flto %s -### \ +// RUN: --sysroot %S/Inputs/basic_cross_linux_tree 2>&1 | \ +// RUN: FileCheck %s --check-prefix=LTO-COMPILE-ONLY + +// RUN: mkdir -p %T.tmp/Output +// RUN: cp %S/Inputs/config.yaml %T.tmp/Output +// RUN: env AUTOTUNE_DATADIR=%T.tmp/Output \ +// RUN: %clang -fautotune -O1 -c -flto %s -v -o %t.tmp.o 2>&1 | \ +// RUN: FileCheck %s --check-prefix=LTO-COMPILE-ONLY + +// RUN: env AUTOTUNE_DATADIR=%T.tmp/Output \ +// RUN: not %clang -fautotune -O1 -flto -v %t.tmp.o 2>&1 | \ +// RUN: FileCheck %s --check-prefix=ERROR-LTO-WITHOUT-LLD + +// RUN: env AUTOTUNE_DATADIR=%T.tmp/Output \ +// RUN: %clang -fautotune -O1 -flto -v %t.tmp.o -fuse-ld=lld 2>&1 | \ +// RUN: FileCheck %s --check-prefix=LTO-LINK-ONLY + +// RUN: export AUTOTUNE_MODE=-fautotune +// RUN: %clang -O1 --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=AUTOTUNE-DEFAULT + +// RUN: export AUTOTUNE_MODE=-fautotune=0 +// RUN: %clang -O1 --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=AUTOTUNE-DEFAULT-ID0 + +// RUN: export AUTOTUNE_MODE=-fautotune=1 +// RUN: AUTOTUNE_DATADIR=/tmp/test_autotune_datadir/ %clang -O1 --sysroot \ +// RUN: %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=AUTOTUNE-ENV-VAR-ID1 + +// RUN: export AUTOTUNE_MODE=-fautotune +// RUN: %clang -O1 -flto -fuse-ld=lld --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=AUTOTUNE-LTO-DEFAULT + +// RUN: export AUTOTUNE_MODE=-fautotune +// RUN: AUTOTUNE_DATADIR=/tmp/test_autotune_datadir/ %clang -O1 -flto -fuse-ld=lld \ +// RUN: --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=AUTOTUNE-LTO-ENV-VAR + +// RUN: export AUTOTUNE_MODE=-fautotune +// RUN: %clang -O0 --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=ERROR-O0 + +// RUN: export AUTOTUNE_MODE=-fautotune +// RUN: %clang -O1 -flto --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=ERROR-LTO-WITHOUT-LLD + +// RUN: export AUTOTUNE_MODE=-fautotune=test +// RUN: %clang -O1 --sysroot %S/Inputs/basic_cross_linux_tree -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=ERROR-NON-INTEGER-ID + +// RUN: env AUTOTUNE_MODE=-fautotune \ +// RUN: %clang -O1 --sysroot %S/Inputs/basic_cross_linux_tree -### %s \ +// RUN: -flto=thin -fuse-ld=lld 2>&1 | \ +// RUN: FileCheck %s --check-prefix=AUTOTUNE-THIN-DEFAULT + +int main() { return 0; } + +// AUTOTUNE-DEFAULT: {{clang.* "-cc1"}} +// AUTOTUNE-DEFAULT-SAME: -debug-info-kind=line-tables-only +// AUTOTUNE-DEFAULT-SAME: -auto-tuning-input=autotune_datadir/config.yaml +// AUTOTUNE-DEFAULT: ld +// AUTOTUNE-DEFAULT-NOT: -auto-tuning-opp=autotune_datadir/opp +// AUTOTUNE-DEFAULT-NOT: -auto-tuning-type-filter=CallSite,Function,Loop + +// AUTOTUNE-ENV-VAR: {{clang.* "-cc1"}} +// AUTOTUNE-ENV-VAR-SAME: -auto-tuning-input=/tmp/test_autotune_datadir/config.yaml + +// AUTOTUNE-DEFAULT-ID0: {{clang.* "-cc1"}} +// AUTOTUNE-DEFAULT-ID0-SAME: -auto-tuning-input=autotune_datadir/config-0.yaml + +// AUTOTUNE-ENV-VAR-ID1: {{clang.* "-cc1"}} +// AUTOTUNE-ENV-VAR-ID1-SAME: -auto-tuning-input=/tmp/test_autotune_datadir/config-1.yaml + +// AUTOTUNE-LTO-DEFAULT: {{clang.* "-cc1"}} +// AUTOTUNE-LTO-DEFAULT-SAME: -debug-info-kind=line-tables-only +// AUTOTUNE-LTO-DEFAULT-SAME: -auto-tuning-input=autotune_datadir/config.yaml +// AUTOTUNE-LTO-DEFAULT: ld.lld +// AUTOTUNE-LTO-DEFAULT-SAME: -auto-tuning-input=autotune_datadir/config.yaml + +// AUTOTUNE-LTO-ENV-VAR: {{clang.* "-cc1"}} +// AUTOTUNE-LTO-ENV-VAR-SAME: -auto-tuning-input=/tmp/test_autotune_datadir/config.yaml +// AUTOTUNE-LTO-ENV-VAR: ld.lld +// AUTOTUNE-LTO-ENV-VAR-SAME: -auto-tuning-input=/tmp/test_autotune_datadir/config.yaml + +// ERROR-O0: error: -fautotune/-fautotune-generate should not be enabled at -O0 +// ERROR-LTO-WITHOUT-LLD: error: LTO requires -fuse-ld=lld +// ERROR-NON-INTEGER-ID: error: invalid integral value 'test' in '-fautotune=test' + +// AUTOTUNE-THIN-DEFAULT: {{clang.* "-cc1"}} +// AUTOTUNE-THIN-DEFAULT-SAME: -debug-info-kind=line-tables-only +// AUTOTUNE-THIN-DEFAULT-SAME: -auto-tuning-input=autotune_datadir/config.yaml +// AUTOTUNE-THIN-DEFAULT: "{{.*}}ld.lld" +// AUTOTUNE-THIN-DEFAULT-SAME: -auto-tuning-input=autotune_datadir/config.yaml +// AUTOTUNE-THIN-DEFAULT-SAME: -autotuning-thin-lto=true + +// LTO-COMPILE-ONLY: {{clang.*-cc1}} +// LTO-COMPILE-ONLY-SAME: -debug-info-kind=line-tables-only +// LTO-COMPILE-ONLY-SAME: -auto-tuning-input={{.*}}/config.yaml +// LTO-COMPILE-ONLY-NOT: ld.lld +// LTO-COMPILE-ONLY-NOT: error: LTO requires -fuse-ld=lld + +// LTO-LINK-ONLY-NOT: {{clang.*-cc1}} +// LTO-LINK-ONLY: ld.lld +// LTO-LINK-ONLY-SAME: -auto-tuning-input={{.*}}/config.yaml diff --git a/clang/test/Autotuning/GenerateOpp/generate.cpp b/clang/test/Autotuning/GenerateOpp/generate.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e6f5a3e986f4ce3cd7f5cc46db2ed8870e501a50 --- /dev/null +++ b/clang/test/Autotuning/GenerateOpp/generate.cpp @@ -0,0 +1,25 @@ +// RUN: rm -rf %t.other +// RUN: export AUTOTUNE_DATADIR=%t.other + +// Test coarse-grain code region generation process and 'Name' field have +// complete path. +// RUN: %clang %s -S -O3 -fautotune-generate=Other -o - +// RUN: grep "Name: \+'%S/generate.cpp'" %t.other/opp/generate.cpp.yaml +// RUN: not grep "Name: \+generate.cpp" %t.other/opp/generate.cpp.yaml + +// Use environment variable 'AUTOTUNE_PROJECT_DIR' to truncate the complete +// prefix and only use filename as 'Name' field for code region. +// RUN: rm -rf %t.other +// RUN: export AUTOTUNE_PROJECT_DIR=%S/ +// RUN: %clang %s -S -O3 -fautotune-generate=Other -o - +// RUN: not grep "Name: \+'%S/generate.cpp'" %t.other/opp/generate.cpp.yaml +// RUN: grep "Name: \+generate.cpp" %t.other/opp/generate.cpp.yaml + +// A simple cpp file. +int main() { + int i = 8; + for (; i < 20;) { + int a = i - 5; + i = i + 2; + } +} diff --git a/clang/test/Autotuning/IncrementalCompilation/Inputs/template.yaml b/clang/test/Autotuning/IncrementalCompilation/Inputs/template.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7d390be63e73072f866117269b707d8b1734b93 --- /dev/null +++ b/clang/test/Autotuning/IncrementalCompilation/Inputs/template.yaml @@ -0,0 +1,9 @@ +--- !AutoTuning +Pass: [dummy-pass] +CodeRegionType: [dummy-type] +Name: foo +DebugLoc: { File: [dummy-file], Line: 0, Column: 0 } +Function: foo +CodeRegionHash: 0 +Invocation: 0 +... diff --git a/clang/test/Autotuning/IncrementalCompilation/Inputs/test1.c b/clang/test/Autotuning/IncrementalCompilation/Inputs/test1.c new file mode 100644 index 0000000000000000000000000000000000000000..4aae331b262e4ec567c8653ed4b8601a4461bdc1 --- /dev/null +++ b/clang/test/Autotuning/IncrementalCompilation/Inputs/test1.c @@ -0,0 +1,3 @@ +// No AutoTuning opportunity. + +void test() { return; } \ No newline at end of file diff --git a/clang/test/Autotuning/IncrementalCompilation/Inputs/test2.c b/clang/test/Autotuning/IncrementalCompilation/Inputs/test2.c new file mode 100644 index 0000000000000000000000000000000000000000..a32fefaf5c1b5f13d184dfb5991b5cef5871957d --- /dev/null +++ b/clang/test/Autotuning/IncrementalCompilation/Inputs/test2.c @@ -0,0 +1,17 @@ +// Inlining opportunity. + +int mul(int a) { return a * a; } + +int add(int a) { return a + a; } + +int inc(int a) { return ++a; } + +int func(int a) { + int x = add(a); + int y = mul(a); + int z = x + y; + + z += inc(a); + + return z; +} diff --git a/clang/test/Autotuning/IncrementalCompilation/Inputs/test3.c b/clang/test/Autotuning/IncrementalCompilation/Inputs/test3.c new file mode 100644 index 0000000000000000000000000000000000000000..f0257e49c33fd1ddd6781874bedf71add68d3328 --- /dev/null +++ b/clang/test/Autotuning/IncrementalCompilation/Inputs/test3.c @@ -0,0 +1,6 @@ +// Loop unrolling opportunity. + +void func3(int *a, int size) { + for (int i = 0; i < size; i++) + a[i]++; +} \ No newline at end of file diff --git a/clang/test/Autotuning/IncrementalCompilation/inc-compile-generate-input.cpp b/clang/test/Autotuning/IncrementalCompilation/inc-compile-generate-input.cpp new file mode 100644 index 0000000000000000000000000000000000000000..54be8d2c5e63dc6314112366ea74a3cbc94b5487 --- /dev/null +++ b/clang/test/Autotuning/IncrementalCompilation/inc-compile-generate-input.cpp @@ -0,0 +1,44 @@ +// REQUIRES: asserts + +// RUN: %clang -O3 -c -fautotune-generate=LLVMParam %S/Inputs/test1.c \ +// RUN: -mllvm -auto-tuning-compile-mode=CoarseGrain \ +// RUN: -mllvm -debug-only=autotuning-compile 2>&1 | \ +// RUN: FileCheck %s -check-prefix=COARSEGRAIN +// RUN: rm -rf %S/Inputs/test1.ll + +// RUN: %clang -O3 -c -fautotune-generate %S/Inputs/test1.c \ +// RUN: -mllvm -auto-tuning-compile-mode=FineGrain \ +// RUN: -mllvm -debug-only=autotuning-compile 2>&1 | \ +// RUN: FileCheck %s -check-prefix=FINEGRAIN-NO-OPP +// RUN: rm -rf %S/Inputs/test1.ll + +// RUN: %clang -O3 -c -fautotune-generate %S/Inputs/test2.c \ +// RUN: -mllvm -auto-tuning-compile-mode=FineGrain \ +// RUN: -mllvm -debug-only=autotuning-compile 2>&1 | \ +// RUN: FileCheck %s -check-prefix=FINEGRAIN-INLINE +// RUN: rm -rf %S/Inputs/test2.ll + +// RUN: %clang -O3 -c -fautotune-generate %S/Inputs/test3.c \ +// RUN: -mllvm -auto-tuning-compile-mode=FineGrain \ +// RUN: -mllvm -debug-only=autotuning-compile 2>&1 | \ +// RUN: FileCheck %s -check-prefix=FINEGRAIN-UNROLL +// RUN: rm -rf %S/Inputs/test3.ll + +// COARSEGRAIN: AutoTuningCompile: IR files writing before Pass: start. + +// FINEGRAIN-NO-OPP: AutoTuningCompile: IR files writing before Pass: start. +// FINEGRAIN-NO-OPP-NEXT: AutoTuningCompile: IR files writing before +// FINEGRAIN-NO-OPP-SAME: Pass: inline. +// FINEGRAIN-NO-OPP: AutoTuningCompile: IR files writing before +// FINEGRAIN-NO-OPP-SAME: Pass: loop-vectorize. +// FINEGRAIN-NO-OPP-NEXT: AutoTuningCompile: IR files writing before Pass: end. + +// FINEGRAIN-INLINE: AutoTuningCompile: IR files writing before Pass: start. +// FINEGRAIN-INLINE-NEXT: AutoTuningCompile: IR files writing before +// FINEGRAIN-INLINE-SAME: Pass: inline. + +// FINEGRAIN-UNROLL: AutoTuningCompile: IR files writing before Pass: start. +// FINEGRAIN-UNROLL-NEXT: AutoTuningCompile: IR files writing before +// FINEGRAIN-UNROLL-SAME: Pass: inline. +// FINEGRAIN-UNROLL-NEXT: AutoTuningCompile: IR files writing before +// FINEGRAIN-UNROLL-SAME: Pass: loop-unroll. diff --git a/clang/test/Autotuning/LTO/Inputs/datadir/corse_grain_config.yaml b/clang/test/Autotuning/LTO/Inputs/datadir/corse_grain_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0a8e9ee2cce1d99ee7b8016d3cd8f19271ac340e --- /dev/null +++ b/clang/test/Autotuning/LTO/Inputs/datadir/corse_grain_config.yaml @@ -0,0 +1 @@ +!AutoTuning {Args: [{-unroll-count: 8}], CodeRegionHash: 0, CodeRegionType: llvm-param, Function: none, Invocation: 0, Name: [module], Pass: none} diff --git a/clang/test/Autotuning/LTO/Inputs/datadir/fine_grain_a.out.yaml b/clang/test/Autotuning/LTO/Inputs/datadir/fine_grain_a.out.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d0f8a86abba50c93b4681913f3332ba48313936d --- /dev/null +++ b/clang/test/Autotuning/LTO/Inputs/datadir/fine_grain_a.out.yaml @@ -0,0 +1,4 @@ +!AutoTuning {Args: [{ForceInline: 1}], CodeRegionHash: 795632090418537900, CodeRegionType: callsite, DebugLoc: {Column: 3, File: clang/test/Autotuning/LTO/Inputs/src/test.c, Line: 15}, Function: main, Invocation: 0, Name: input_data-, Pass: inline} +--- !AutoTuning {Args: [{UnrollCount: 4}], CodeRegionHash: 13976873016266268884, CodeRegionType: loop, DebugLoc: {Column: 3, File: clang/test/Autotuning/LTO/Inputs/src/test.c, Line: 7}, Function: main, Invocation: 1, Name: label %39, Pass: loop-unroll} +--- !AutoTuning {Args: [{UnrollCount: 1}], CodeRegionHash: 6932431056461711356, CodeRegionType: loop, DebugLoc: {Column: 3, File: clang/test/Autotuning/LTO/Inputs/src/test.c, Line: 7}, Function: main, Invocation: 1, Name: label %25, Pass: loop-unroll} +--- !AutoTuning {Args: [{UnrollCount: 1}], CodeRegionHash: 14763110312986404579, CodeRegionType: loop, DebugLoc: {Column: 3, File: clang/test/Autotuning/LTO/Inputs/src/input.c, Line: 5}, Function: main, Invocation: 1, Name: label %12, Pass: loop-unroll} diff --git a/clang/test/Autotuning/LTO/Inputs/datadir/fine_grain_output.yaml b/clang/test/Autotuning/LTO/Inputs/datadir/fine_grain_output.yaml new file mode 100644 index 0000000000000000000000000000000000000000..937ed95ad5fa648c1eaa97aae142527d8fe4331d --- /dev/null +++ b/clang/test/Autotuning/LTO/Inputs/datadir/fine_grain_output.yaml @@ -0,0 +1 @@ +!AutoTuning {Args: [{UnrollCount: 8}], CodeRegionHash: 1116845059539910783, CodeRegionType: loop, DebugLoc: {Column: 3, File: clang/test/Autotuning/LTO/Inputs/src/output.c, Line: 5}, Function: output_data, Invocation: 1, Name: for.body, Pass: loop-unroll} diff --git a/clang/test/Autotuning/LTO/Inputs/src/input.c b/clang/test/Autotuning/LTO/Inputs/src/input.c new file mode 100644 index 0000000000000000000000000000000000000000..2511e80d3930c0eb3be3b6e87e80ba71949ae2bd --- /dev/null +++ b/clang/test/Autotuning/LTO/Inputs/src/input.c @@ -0,0 +1,7 @@ +#include + +void input_data(int Arr[], int NI) { + printf("Input data...\n"); + for (int I = 0; I < NI; ++I) + scanf("%d", &Arr[I]); +} diff --git a/clang/test/Autotuning/LTO/Inputs/src/input.h b/clang/test/Autotuning/LTO/Inputs/src/input.h new file mode 100644 index 0000000000000000000000000000000000000000..eed043181f2e12f3edd163f94a8f500feb72c3e8 --- /dev/null +++ b/clang/test/Autotuning/LTO/Inputs/src/input.h @@ -0,0 +1 @@ +void input_data(int Arr[], int NI); diff --git a/clang/test/Autotuning/LTO/Inputs/src/output.c b/clang/test/Autotuning/LTO/Inputs/src/output.c new file mode 100644 index 0000000000000000000000000000000000000000..d8ff68f8dd01b10ef095882b2c1810721eab036f --- /dev/null +++ b/clang/test/Autotuning/LTO/Inputs/src/output.c @@ -0,0 +1,8 @@ +#include + +void output_data(int *Arr, int NI) { + printf("Printing data...\n"); + for (int I = 0; I < NI; ++I) + printf("%d\t", Arr[I]); + printf("\nPrinting done...\n"); +} diff --git a/clang/test/Autotuning/LTO/Inputs/src/output.h b/clang/test/Autotuning/LTO/Inputs/src/output.h new file mode 100644 index 0000000000000000000000000000000000000000..1c5aa0b119f43012ced82d5d6e1809a24226c9f5 --- /dev/null +++ b/clang/test/Autotuning/LTO/Inputs/src/output.h @@ -0,0 +1 @@ +void output_data(int *Arr, int NI); diff --git a/clang/test/Autotuning/LTO/Inputs/src/test.c b/clang/test/Autotuning/LTO/Inputs/src/test.c new file mode 100644 index 0000000000000000000000000000000000000000..5d22f358505f2f3e69db5bb522d902b6052d8269 --- /dev/null +++ b/clang/test/Autotuning/LTO/Inputs/src/test.c @@ -0,0 +1,19 @@ +#include "input.h" +#include "output.h" +#include +#include + +void inc_data(int Arr[], int Size) { + for (int I = 0; I < Size; ++I) + Arr[I] = Arr[I] + 1; +} + +int main(int argc, char **argv) { + int NI = atoi(argv[1]); + int Arr[NI]; + + input_data(Arr, NI); + inc_data(Arr, NI); + output_data(Arr, NI); + return 0; +} diff --git a/clang/test/Autotuning/LTO/apply_config_coarse_grain.cpp b/clang/test/Autotuning/LTO/apply_config_coarse_grain.cpp new file mode 100644 index 0000000000000000000000000000000000000000..353f875c001a813c918a42726b87f4b50bfc069b --- /dev/null +++ b/clang/test/Autotuning/LTO/apply_config_coarse_grain.cpp @@ -0,0 +1,41 @@ +// REQUIRES: asserts + +// RUN: rm -rf %t.data_dir +// RUN: export AUTOTUNE_DATADIR=%t.data_dir +// RUN: mkdir $AUTOTUNE_DATADIR + +// RUN: sed 's#\[module\]#%S/Inputs/src/input.c#g' \ +// RUN: %S/Inputs/datadir/corse_grain_config.yaml > %t.data_dir/config.yaml + +// RUN: %clang -O3 -c -flto=full -fuse-ld=lld -fautotune %S/Inputs/src/output.c \ +// RUN: -o %t.output.o -mllvm -debug-only=loop-unroll -mllvm -unroll-count=0 \ +// RUN: 2>&1 | FileCheck %s --check-prefix=COARSEGRAIN1 + +// RUN: %clang -O3 -c -flto=full -fuse-ld=lld -fautotune %S/Inputs/src/input.c \ +// RUN: -o %t.input.o -mllvm -debug-only=loop-unroll 2>&1 | \ +// RUN: FileCheck %s --check-prefix=COARSEGRAIN2 + +// RUN: %clang -O3 -c -flto=full -fuse-ld=lld -fautotune %S/Inputs/src/test.c \ +// RUN: -o %t.test.o -mllvm -debug-only=loop-unroll -mllvm -unroll-count=0 \ +// RUN: 2>&1 | FileCheck %s --check-prefix=COARSEGRAIN1 + +// RUN: sed 's#\[module\]#a.out#g' %S/Inputs/datadir/corse_grain_config.yaml \ +// RUN: > %t.data_dir/config.yaml + +// RUN: env AUTOTUNE_PROJECT_DIR=$(echo -n %T | sed 's\Output\\') \ +// RUN: %clang -O3 -fautotune %t.output.o %t.input.o %t.test.o \ +// RUN: -flto=full -fuse-ld=lld -Wl,-mllvm,-debug-only=loop-unroll 2>&1 |\ +// RUN: FileCheck %s --check-prefix=COARSEGRAIN3 + +// COARSEGRAIN1-NOT: UNROLLING loop + +// COARSEGRAIN2: Loop Unroll: F[input_data] Loop [[NAME:%for.body]] +// COARSEGRAIN2: UNROLLING loop [[NAME]] + +// COARSEGRAIN3: Loop Unroll: F{{\[}}[[FUNCNAME:main]]{{\]}} Loop % +// COARSEGRAIN3: UNROLLING loop % +// COARSEGRAIN3: Loop Unroll: F{{\[}}[[FUNCNAME]]{{\]}} Loop % +// COARSEGRAIN3: UNROLLING loop % +// COARSEGRAIN3: Loop Unroll: F{{\[}}[[FUNCNAME]]{{\]}} Loop % +// COARSEGRAIN3: UNROLLING loop % +// COARSEGRAIN3-NOT: UNROLLING loop diff --git a/clang/test/Autotuning/LTO/apply_config_fine_grain.cpp b/clang/test/Autotuning/LTO/apply_config_fine_grain.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7f0894cefeaced27733ca972b6b35121f33d6319 --- /dev/null +++ b/clang/test/Autotuning/LTO/apply_config_fine_grain.cpp @@ -0,0 +1,58 @@ +// REQUIRES: asserts + +// RUN: rm -rf %t.data_dir +// RUN: export AUTOTUNE_DATADIR=%t.data_dir +// RUN: mkdir $AUTOTUNE_DATADIR +// RUN: cp %S/Inputs/datadir/fine_grain_output.yaml %t.data_dir/config.yaml + +// RUN: %clang -O3 -flto=full -fuse-ld=lld -fautotune %S/Inputs/src/output.c \ +// RUN: -c -mllvm -debug-only=loop-unroll -mllvm -debug-only=autotuning \ +// RUN: -mllvm -auto-tuning-code-region-matching-hash=false -o %t.output.o \ +// RUN: 2>&1 -mllvm -print-before-all | \ +// RUN: FileCheck %s --check-prefix=FINEGRAIN1 + +// RUN: %clang -O3 -flto=full -fuse-ld=lld -fautotune %S/Inputs/src/input.c \ +// RUN: -c -mllvm -debug-only=loop-unroll -mllvm -debug-only=autotuning \ +// RUN: -mllvm -auto-tuning-code-region-matching-hash=false -o %t.input.o \ +// RUN: 2>&1 | FileCheck %s --check-prefix=FINEGRAIN2 + +// RUN: %clang -O3 -flto=full -fuse-ld=lld -fautotune %S/Inputs/src/test.c \ +// RUN: -c -mllvm -debug-only=loop-unroll -mllvm -debug-only=autotuning \ +// RUN: -mllvm -auto-tuning-code-region-matching-hash=false -o %t.test.o \ +// RUN: 2>&1 | FileCheck %s --check-prefix=FINEGRAIN2 + +// RUN: cp %S/Inputs/datadir/fine_grain_a.out.yaml %t.data_dir/config.yaml +// RUN: %clang -O3 -flto=full -fuse-ld=lld -fautotune %t.output.o %t.input.o \ +// RUN: %t.test.o -Wl,-mllvm,-auto-tuning-code-region-matching-hash=false \ +// RUN: -Wl,-mllvm,-debug-only=inline -Wl,-mllvm,-debug-only=loop-unroll \ +// RUN: -Wl,-mllvm,-debug-only=autotuning 2>&1 | \ +// RUN: FileCheck %s --check-prefix=FINEGRAIN3 + +// FINEGRAIN1: IR Dump Before LoopUnrollPass on [[FUNCNAME1:output_data]] +// FINEGRAIN1: Loop Unroll: F{{\[}}[[FUNCNAME1]]{{\]}} Loop %[[NAME:for.body]] +// FINEGRAIN1: UnrollCount is set for the CodeRegion +// FINEGRAIN1-NEXT: Name: [[NAME]] +// FINEGRAIN1-NEXT: FuncName: [[FUNCNAME1]] +// FINEGRAIN1: UNROLLING loop %[[NAME]] + +// FINEGRAIN2-NOT: UnrollCount is set for the CodeRegion + +// FINEGRAIN3: ForceInline is set for the CodeRegion +// FINEGRAIN3-NEXT: Name: [[CALLEE:input_data]] +// FINEGRAIN3: Inlining (cost=always): Force inlined by auto-tuning +// FINEGRAIN3-SAME: @[[CALLEE]] +// FINEGRAIN3: Loop Unroll: F{{\[}}[[FUNCNAME2:main]]{{\]}} Loop % +// FINEGRAIN3: UnrollCount is set for the CodeRegion +// FINEGRAIN3-NEXT: Name: label % +// FINEGRAIN3-NEXT: FuncName: [[FUNCNAME2]] +// FINEGRAIN3: UNROLLING loop % +// FINEGRAIN3: Loop Unroll: F{{\[}}[[FUNCNAME2]]{{\]}} Loop % +// FINEGRAIN3: UnrollCount is set for the CodeRegion +// FINEGRAIN3-NEXT: Name: label % +// FINEGRAIN3-NEXT: FuncName: [[FUNCNAME2]] +// FINEGRAIN3: UNROLLING loop % +// FINEGRAIN3: Loop Unroll: F{{\[}}[[FUNCNAME2]]{{\]}} Loop % +// FINEGRAIN3: UnrollCount is set for the CodeRegion +// FINEGRAIN3-NEXT: Name: label % +// FINEGRAIN3-NEXT: FuncName: [[FUNCNAME2]] +// FINEGRAIN3: UNROLLING loop % diff --git a/clang/test/Autotuning/LTO/generate_opportunity.cpp b/clang/test/Autotuning/LTO/generate_opportunity.cpp new file mode 100644 index 0000000000000000000000000000000000000000..efcc4dc7c2f2e50d5322cd1218f364a2cb2dbb65 --- /dev/null +++ b/clang/test/Autotuning/LTO/generate_opportunity.cpp @@ -0,0 +1,56 @@ +// RUN: rm -rf %t.data_dir +// RUN: export AUTOTUNE_DATADIR=%t.data_dir + +// RUN: %clang -O3 -c -flto=full -fuse-ld=lld -fautotune-generate -o %t.output.o \ +// RUN: %S/Inputs/src/output.c +// RUN: FileCheck %s --input-file %t.data_dir/opp/output.c.yaml \ +// RUN: -check-prefix=FINEGRAIN + +// RUN: %clang -O3 -c -flto=full -fuse-ld=lld -fautotune-generate -o %t.input.o \ +// RUN: %S/Inputs/src/input.c +// RUN: FileCheck %s --input-file %t.data_dir/opp/input.c.yaml \ +// RUN: -check-prefix=FINEGRAIN + +// RUN: %clang -O3 -c -flto=full -fuse-ld=lld -fautotune-generate -o %t.test.o \ +// RUN: %S/Inputs/src/test.c +// RUN: FileCheck %s --input-file %t.data_dir/opp/test.c.yaml \ +// RUN: -check-prefix=FINEGRAIN + +// RUN: %clang -O3 -flto=full -fuse-ld=lld -fautotune-generate %t.output.o \ +// RUN: %t.test.o %t.input.o +// RUN: FileCheck %s --input-file %t.data_dir/opp/a.out.yaml \ +// RUN: -check-prefix=FINEGRAIN-LTO + +// RUN: rm -rf %t.data_dir +// RUN: export AUTOTUNE_DATADIR=%t.data_dir + +// RUN: %clang -O3 -c -flto=full -fuse-ld=lld -fautotune-generate=LLVMParam \ +// RUN: %S/Inputs/src/output.c -o %t.output.o +// RUN: FileCheck %s --input-file %t.data_dir/opp/output.c.yaml \ +// RUN: -check-prefix=COARSEGRAIN + +// RUN: %clang -O3 -c -flto=full -fuse-ld=lld -fautotune-generate=LLVMParam \ +// RUN: %S/Inputs/src/input.c -o %t.input.o +// RUN: FileCheck %s --input-file %t.data_dir/opp/input.c.yaml \ +// RUN: -check-prefix=COARSEGRAIN + +// RUN: %clang -O3 -c -flto=full -fuse-ld=lld -fautotune-generate=LLVMParam \ +// RUN: %S/Inputs/src/test.c -o %t.test.o +// RUN: FileCheck %s --input-file %t.data_dir/opp/test.c.yaml \ +// RUN: -check-prefix=COARSEGRAIN + +// RUN: %clang -O3 -flto=full -fuse-ld=lld -fautotune-generate=LLVMParam \ +// RUN: %t.test.o %t.input.o %t.output.o +// RUN: FileCheck %s --input-file %t.data_dir/opp/a.out.yaml \ +// RUN: -check-prefix=COARSEGRAIN + +// FINEGRAIN: --- !AutoTuning +// FINEGRAIN: CodeRegionType: {{callsite|loop}} + +// FINEGRAIN-LTO: --- !AutoTuning +// FINEGRAIN-LTO: CodeRegionType: callsite +// FINEGRAIN-LTO: --- !AutoTuning +// FINEGRAIN-LTO: CodeRegionType: loop + +// COARSEGRAIN: --- !AutoTuning +// COARSEGRAIN: CodeRegionType: llvm-param diff --git a/clang/test/Autotuning/PhaseOrdering/Inputs/template.yaml b/clang/test/Autotuning/PhaseOrdering/Inputs/template.yaml new file mode 100644 index 0000000000000000000000000000000000000000..065d3cb85b72f89d2a66b40e55387975aaac3fcc --- /dev/null +++ b/clang/test/Autotuning/PhaseOrdering/Inputs/template.yaml @@ -0,0 +1,8 @@ +--- !AutoTuning +Pass: all +Name: [filename] +Function: none +CodeRegionType: other +Args: + - OptPass: [pass] +... diff --git a/clang/test/Autotuning/PhaseOrdering/pass-order.cpp b/clang/test/Autotuning/PhaseOrdering/pass-order.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ce42527909f6b737ec24e1d8aad44d2837ab76cc --- /dev/null +++ b/clang/test/Autotuning/PhaseOrdering/pass-order.cpp @@ -0,0 +1,48 @@ +// Disable auto-tuning +// RUN: %clang %s -S -mllvm -debug-pass=Arguments 2>&1 >/dev/null | \ +// RUN: FileCheck %s -check-prefix=DISABLE + +// One Pass +// RUN: rm %t.onepass-debug-pass.yaml -rf +// RUN: sed 's#\[filename\]#%s#g; s#\[pass\]#\[loop-extract\]#g' \ +// RUN: %S/Inputs/template.yaml > %t.onepass-debug-pass.yaml +// RUN: %clang %s -S -mllvm -auto-tuning-input=%t.onepass-debug-pass.yaml \ +// RUN: -mllvm -print-after-all 2>&1 >/dev/null | \ +// RUN: FileCheck %s -check-prefix=ONEPASS + +// Two passes (A->B): +// RUN: rm %t.twopass-ab-debug-pass.yaml -rf +// RUN: sed 's#\[filename\]#%s#g; s#\[pass\]#\[loop-extract,strip\]#g' \ +// RUN: %S/Inputs/template.yaml > %t.twopass-ab-debug-pass.yaml +// RUN: %clang %s -S -mllvm -auto-tuning-input=%t.twopass-ab-debug-pass.yaml \ +// RUN: -mllvm -print-after-all 2>&1 >/dev/null | \ +// RUN: FileCheck %s -check-prefix=TWOPASS_AB + +// Two passes (B->A): +// RUN: rm %t.twopass-ba-debug-pass.yaml -rf +// RUN: sed 's#\[filename\]#%s#g; s#\[pass\]#\[strip,loop-extract\]#g' \ +// RUN: %S/Inputs/template.yaml > %t.twopass-ba-debug-pass.yaml +// RUN: %clang %s -S -mllvm -auto-tuning-input=%t.twopass-ba-debug-pass.yaml \ +// RUN: -mllvm -print-after-all 2>&1 >/dev/null | \ +// RUN: FileCheck %s -check-prefix=TWOPASS_BA +// UNSUPPORTED: windows + +// a simple cpp file +int main() { + int i = 8; + for (; i < 20;) { + int a = i - 5; + i = i + 2; + } +} + +// DISABLE: Pass Arguments: +// DISABLE-NOT: -loop-extract + +// ONEPASS: *** IR Dump After LoopExtractorPass + +// TWOPASS_AB: *** IR Dump After LoopExtractorPass +// TWOPASS_AB: *** IR Dump After StripSymbolsPass + +// TWOPASS_BA: *** IR Dump After StripSymbolsPass +// TWOPASS_BA: *** IR Dump After LoopExtractorPass diff --git a/clang/test/Autotuning/lit.local.cfg b/clang/test/Autotuning/lit.local.cfg new file mode 100644 index 0000000000000000000000000000000000000000..13b4927257ab18e426fe9142bd4b63881a721146 --- /dev/null +++ b/clang/test/Autotuning/lit.local.cfg @@ -0,0 +1,2 @@ +if not config.enable_enable_autotuner: + config.unsupported = True diff --git a/clang/test/lit.site.cfg.py.in b/clang/test/lit.site.cfg.py.in index bc278740e0d04b4f26a13183591813e2c1abd579..3cfea9d681d238a7ba36adc9e9427a1edf06e3ed 100644 --- a/clang/test/lit.site.cfg.py.in +++ b/clang/test/lit.site.cfg.py.in @@ -42,6 +42,7 @@ config.standalone_build = @CLANG_BUILT_STANDALONE@ config.ppc_linux_default_ieeelongdouble = @PPC_LINUX_DEFAULT_IEEELONGDOUBLE@ config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@ config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@ +config.enable_enable_autotuner = @LLVM_ENABLE_AUTOTUNER@ import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index c2059c70e15a3d1326cf8d7d9d6691ae746e2616..ffd0842b907812832b77e260ae70b3186ed1f3a1 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -341,6 +341,18 @@ void LinkerDriver::addLibrary(StringRef name) { // Technically this can be delayed until we read bitcode files, but // we don't bother to do lazily because the initialization is fast. static void initLLVM() { +#if defined(ENABLE_AUTOTUNER) + // AUTO-TUNING - initialization + if (Error E = autotuning::Engine.init(config->outputFile.data())) { + error(toString(std::move(E))); + return; + } + if (autotuning::Engine.isEnabled() && autotuning::Engine.isParseInput() && + (autotuning::Engine.LLVMParams.size() || + autotuning::Engine.ProgramParams.size())) + llvm::cl::ParseAutoTunerOptions(autotuning::Engine.LLVMParams, + autotuning::Engine.ProgramParams); +#endif InitializeAllTargets(); InitializeAllTargetMCs(); InitializeAllAsmPrinters(); @@ -2814,6 +2826,12 @@ void LinkerDriver::link(opt::InputArgList &args) { reportBackrefs(); writeArchiveStats(); writeWhyExtract(); +#if defined(ENABLE_AUTOTUNER) + // AUTO-TUNING - finalization + if (Error E = autotuning::Engine.finalize()) { + error(toString(std::move(E))); + } +#endif if (errorCount()) return; diff --git a/llvm/cmake/modules/CrossCompile.cmake b/llvm/cmake/modules/CrossCompile.cmake index 6af47b51d4c6066bc64283cdd74a899a78a6c6d1..1a9fb4b2dddc95411c15d21d6b32dd60f92fd8e5 100644 --- a/llvm/cmake/modules/CrossCompile.cmake +++ b/llvm/cmake/modules/CrossCompile.cmake @@ -82,6 +82,7 @@ function(llvm_create_cross_target project_name target_name toolchain buildtype) -DLLVM_ENABLE_PROJECTS="${llvm_enable_projects_arg}" -DLLVM_EXTERNAL_PROJECTS="${llvm_external_projects_arg}" -DLLVM_ENABLE_RUNTIMES="${llvm_enable_runtimes_arg}" + -DLLVM_ENABLE_AUTOTUNER="${LLVM_ENABLE_AUTOTUNER}" ${external_project_source_dirs} -DLLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN="${LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN}" -DLLVM_INCLUDE_BENCHMARKS=OFF diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 492ea25b179bc1bf919c450f200796b799e3ee33..fd4d92e57c699f117d748a59be000b6250050545 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -97,6 +97,14 @@ else() set(LLVM_ENABLE_CLASSIC_FLANG 0) endif() +option(LLVM_ENABLE_AUTOTUNER "Enable BiSheng Auto-Tuning features" OFF) +if (LLVM_ENABLE_AUTOTUNER) + set(LLVM_ENABLE_AUTOTUNER 1) + add_definitions( -DENABLE_AUTOTUNER ) +else() + set(LLVM_ENABLE_AUTOTUNER 0) +endif() + if(LLVM_ENABLE_EXPENSIVE_CHECKS) add_compile_definitions(EXPENSIVE_CHECKS) diff --git a/llvm/include/llvm/Analysis/AutotuningDump.h b/llvm/include/llvm/Analysis/AutotuningDump.h new file mode 100644 index 0000000000000000000000000000000000000000..fb973f05323e534527490b0db0e90c3e204ea98c --- /dev/null +++ b/llvm/include/llvm/Analysis/AutotuningDump.h @@ -0,0 +1,75 @@ +#if defined(ENABLE_AUTOTUNER) +// ===-- AutotuningDump.h - Auto-Tuning-----------------------------------===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// ===--------------------------------------------------------------------===// +// +// This file contains pass collecting IR of tuned regions and storing them into +// predetrmined locations, to be used later by autotuning ML guidance +// +// ===--------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include + +namespace llvm { +class AutotuningDump { +public: + AutotuningDump(bool IncrementalCompilation = false); + bool run(Module &F, function_ref GetLI); + +private: + std::string AutoTuneDirPath; + std::unique_ptr createFile(const Twine &File); + int getConfigNumber(); + void dumpToStream(llvm::raw_ostream &os, const Loop &L) const; + void dumpToStream(llvm::raw_ostream &os, const Function &F) const; + void dumpFunctions(llvm::Module &M); + void dumpLoops(llvm::Module &M, function_ref GetLI); + void dumpModule(llvm::Module &M); + std::string getDirectoryName(const std::string File) const; + std::string getFileName(std::string FilePath); + + bool IsIncrementalCompilation; +}; + +class AutotuningDumpLegacy : public ModulePass { +public: + static char ID; + AutotuningDumpLegacy(bool IncrementalCompilation = false); + StringRef getPassName() const override; + bool runOnModule(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + bool IsIncrementalCompilation; +}; + +class AutotuningDumpAnalysis + : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + +public: + AutotuningDumpAnalysis(bool IncrementalCompilation = false) { + IsIncrementalCompilation = IncrementalCompilation; + } + + // This pass only prints IRs of selected function or loops without doing any + // real analyses, thus the return value is meaningless. To avoid leaking data + // or memory, we typedef Result to Optional to avoid having to return an + // AutotuningDump object. + using Result = std::optional; + Result run(Module &M, ModuleAnalysisManager &AM); + +private: + bool IsIncrementalCompilation; +}; +} // namespace llvm +#endif \ No newline at end of file diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index 3434630c27cfe74194e0116951def4f7bfb8c938..9be3e056cf76061120f746c0b88a8d616b7cf4f7 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -26,6 +26,9 @@ #include #include #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif namespace llvm { @@ -44,7 +47,12 @@ extern template class LoopBase; /// Represents a single loop in the control flow graph. Note that not all SCCs /// in the CFG are necessarily loops. +#if defined(ENABLE_AUTOTUNER) +class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase, + public autotuning::Container { +#else class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase { +#endif public: /// A range representing the start and end location of a loop. class LocRange { @@ -395,6 +403,11 @@ public: return ""; } +#if defined(ENABLE_AUTOTUNER) + void initCodeRegion() override; + uint64_t computeStructuralHash() override; +#endif + private: Loop() = default; diff --git a/llvm/include/llvm/Analysis/Passes.h b/llvm/include/llvm/Analysis/Passes.h index ac1bc3549910c8c8465114685462fcaf4637dfc5..65f566cc75deaea6db0b6734c7584b928390e900 100644 --- a/llvm/include/llvm/Analysis/Passes.h +++ b/llvm/include/llvm/Analysis/Passes.h @@ -58,6 +58,16 @@ namespace llvm { // in a function and builds the region hierarchy. // FunctionPass *createRegionInfoPass(); + +#if defined(ENABLE_AUTOTUNER) + //===--------------------------------------------------------------------===// + // + // createAutotuningDumpPass - This pass collects IR of tuned regions + // and stores them into predetrmined locations. + // for the purpose of autotuning ML guidance + // + ModulePass *createAutotuningDumpPass(); +#endif } #endif diff --git a/llvm/include/llvm/AutoTuner/AutoTuning.h b/llvm/include/llvm/AutoTuner/AutoTuning.h new file mode 100644 index 0000000000000000000000000000000000000000..0f1f276306ecc4647400f5c12a1e3d3874ca0e32 --- /dev/null +++ b/llvm/include/llvm/AutoTuner/AutoTuning.h @@ -0,0 +1,486 @@ +#if defined(ENABLE_AUTOTUNER) +//===-- AutoTuning.h - Auto-Tuning-----------------------------------------===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines Auto Tuning related functions, models and interfaces. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AUTOTUNER_AUTOTUNING_H_ +#define LLVM_AUTOTUNER_AUTOTUNING_H_ + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Support/Casting.h" +#include +#include +#include +#include +#include + +// Options for AutoTuner incremental compilation. +enum AutoTuningCompileOpt { + Inactive, // Disabled incremental compilation. + CoarseGrain, // For tuning LLVMParam. + FineGrain, // For tuning default code regions (Loop, CallSite, Function). + Basic // Same as CoarseGrain but can be applied for any code region. + // Can be used with ImpactRanker. +}; + +namespace autotuning { +// Constant defintion for AutoTuner incremental compilation. +const std::string CompileOptionStart = "start"; +const std::string CompileOptionEnd = "end"; +const std::string CompileOptionUnknow = "unknown"; +const std::string CompileOptionUnroll = "loop-unroll"; +const std::string CompileOptionVectorize = "loop-vectorize"; +const std::string CompileOptionInline = "inline"; + +class ParameterBase { +public: + virtual ~ParameterBase() = default; + enum ParameterKind { + PK_PARAMETER, + }; + ParameterKind getKind() const { return Kind; } + + explicit ParameterBase(ParameterKind K) : Kind(K) {} + +private: + const ParameterKind Kind; +}; + +template class Parameter : public ParameterBase { +public: + Parameter(const T &RHS) : ParameterBase(PK_PARAMETER), Value(RHS) {} + const T &getValue() const { return Value; } + void setValue(const T &RHS) { Value = RHS; } + + static bool classof(const ParameterBase *P) { + return P->getKind() == PK_PARAMETER; + } + +private: + T Value; +}; + +/// This class manages parameters of one codeRegion. +class ParameterManager { + +public: + // add a param into this ParameterManager + template + void add(const std::string &ParamName, const T ParamValue) { + std::shared_ptr Param = + std::make_shared>(ParamValue); + this->Parameters[ParamName] = Param; + } + + // Look up the value of a parameter by name in this ParameterManager. + // The found value will be assigned to the reference variable "Value". + // Return true if the parameter exits in this ParameterManager, + // and false otherwise. + template + bool findByName(const std::string &ParamName, T &Value) const { + auto Iterator = Parameters.find(ParamName); + if (Iterator == Parameters.end()) { + return false; + } + + auto ParamPtr = llvm::dyn_cast>(Iterator->second.get()); + if (ParamPtr != nullptr) { + Value = ParamPtr->getValue(); + return true; + } else { + return false; + } + } + +private: + std::unordered_map> Parameters; +}; + +/// The debug location used to track a CodeRegion back to the source file. +struct SourceLocation { + /// The source file corresponding to this CodeRegion. + std::string SourceFilePath; + unsigned SourceLine = 0; + unsigned SourceColumn = 0; + + bool operator==(const SourceLocation &CR) const { + return (this->SourceFilePath == CR.SourceFilePath) && + (this->SourceLine == CR.SourceLine) && + (this->SourceColumn == CR.SourceColumn); + }; + + explicit operator bool() const { + return !(SourceFilePath.empty() && SourceLine == 0 && SourceColumn == 0); + } +}; + +enum CodeRegionType { + CallSite, // Code region for function inlining. + Function, // Used in AutoTuningDump pass for IR writing. + LLVMParam, // Compilation flags. Tuned individually for each module. + Loop, // Code region for loops. + MachineBasicBlock, // Instruction scheduling code region. + Other, // Pass ordering code region. + ProgramParam, // Compilation flags. Tuned collectively for program. + Switch, // Tuning MinJumpTableEntries parameter for switch inst. + Empty, // Empty CodeRegion. + Invalid // Invalid CodeRegion. +}; + +enum HotnessType { + Unknown, + Cold, + Hot, +}; + +/// DynamicOptions represent a map: Arg -> DynamicConfigs. +/// Where Arg is a tuning parameter on the associated CodeRegion. +/// And DynamicConfigs is the possible tuning values associated with Arg. +typedef std::map> DynamicOptions; + +/// This class represents a region in source code including +/// its name, function name, type, debug location, and associated pass name. +class CodeRegion { + +public: + // Default constructor + CodeRegion(const CodeRegionType Type = CodeRegionType::Other); + ~CodeRegion() = default; + // Concrete constructors + CodeRegion(const std::string &Name, const std::string &FuncName, + const CodeRegionType &Type, const llvm::DebugLoc &DL, + const DynamicOptions DO = {}); + CodeRegion(const std::string &Name, const std::string &FuncName, + const CodeRegionType &Type, + const SourceLocation &Location = SourceLocation(), + const DynamicOptions DO = {}); + CodeRegion(const std::string &Name, const std::string &FuncName, + const std::string &PassName, const CodeRegionType &Type, + const SourceLocation &Location = SourceLocation(), + const unsigned int Invocation = 0); + + bool operator==(const CodeRegion &CR) const; + inline bool operator!=(const CodeRegion &CR) const { return !(*this == CR); }; + + explicit operator bool() const { + return !(Name.empty() && FuncName.empty() && PassName.empty()); + } + + static std::string getTypeAsString(CodeRegionType CRType); + static std::string getHotnessAsString(HotnessType Hotness); + const std::string &getName() const { return Name; } + const std::string &getFuncName() const { return FuncName; } + const CodeRegionType &getType() const { return Type; } + const std::string &getFileName() const { return Location.SourceFilePath; } + const std::string &getTypeAsString() const { return StringType; } + const SourceLocation &getSourceLoc() const { return Location; } + const std::string &getPassName() const { return PassName; } + unsigned getSize() const { return Size; }; + void setPassName(const std::string &NewPassName); + void setSize(unsigned Size) { this->Size = Size; }; + void setHotness(HotnessType NewHotness) const { this->Hotness = NewHotness; } + HotnessType getHotness() const { return this->Hotness; } + std::string getHotnessAsString() const { return getHotnessAsString(Hotness); } + bool isCold() const { return this->Hotness == Cold; } + bool isHot() const { return this->Hotness == Hot; } + std::uint64_t getHash() const { return this->Hash; } + void setHash(std::uint64_t Hash) { this->Hash = Hash; } + DynamicOptions getAutoTunerOptions() const { return this->AutoTunerOptions; } + void setInvocation(unsigned int Invocation) { this->Invocation = Invocation; } + unsigned int getInvocation() const { return this->Invocation; } + + /// Add dynamic config options with Code Region for AutoTuner to tune instead + /// of using static config options. + void addAutoTunerOptions(const std::string ParamName, + std::vector Options) const { + this->AutoTunerOptions.insert( + std::pair>(ParamName, Options)); + } + static CodeRegion getInvalidInstance(); + static CodeRegion getEmptyInstance(); + void setBaselineConfig(std::map Value) const { + this->BaselineConfig = Value; + }; + std::map getBaselineConfig() const { + return this->BaselineConfig; + } + +private: + /// Name of the code region. + /// For most of cases it's set to the name of a header basic block. + std::string Name; + /// Function name of this code region if any. + std::string FuncName; + /// Name of the pass which this code region is associated. + std::string PassName; + /// Type of this code region. Options are other, function, loop, + /// and machine basic block. + CodeRegionType Type; + /// Source Location. + SourceLocation Location; + std::string StringType; + /// Structural hash for the CodeRegion. + std::uint64_t Hash = 0; + /// Configs values passed to AutoTuner for dynamic setting of search space + /// for code regions. + mutable DynamicOptions AutoTunerOptions; + /// Configuration values passed to AutoTuner for generating the same binary + /// as the baseline. + mutable std::map BaselineConfig; + + /// Record the order of invocation of an optimization pass during the whole + /// compilation pipeline. It is used to differentiate multiple invocations of + /// a same optimization pass. + /// Currently, Loop Unroll pass is invoked twice during the compilation + /// pipeline. 'Invocation' helps to relate a code region with the invocation + /// of Loop Unroll pass where the code region is generated. + mutable unsigned int Invocation; + + /// Size of this code region. Usually it refers to the number of instructions + /// but could be different based on implementations. + unsigned Size = 0; + mutable HotnessType Hotness = Unknown; + + /// A boolean flag to record if a CR is initialized or not. + /// It should only be set to true by initContainer(). + /// We only add initialized CR to TuningOpps. + bool Initialized = false; + + friend class AutoTuningEngine; +}; + +/// This class is an interface for classes representing code regions in LLVM +/// (eg. Loop, Function and MachineBasicBlock) to inherit +/// so that auto-tuning can be enabled on them. +/// A Container must contain a CodeRegion. +class Container { + +public: + Container() {} + virtual ~Container(){}; + + /// Abstract method for derived classes to overwrite + virtual void initCodeRegion() = 0; + virtual uint64_t computeStructuralHash() = 0; + + /// Get the Container's CodeRegion. + const CodeRegion &getCodeRegion() const; + /// Set the Container's CodeRegion. + void setCodeRegion(const CodeRegion &NewCR); + /// This method is to look up the value of a parameter that corresponds to an + /// Container. The parameter being looked up is stored in a ParameterManager. + template + bool lookUpParams(const std::string &ParamsName, T &Value) const; + + /// Check if the code region is being tuned by config file. + bool requiresIRDump(bool IsFunctionIR = false) const; + +private: + CodeRegion CR; + friend class AutoTuningEngine; +}; +} // end namespace autotuning + +namespace std { +template <> +// Implement hash for CodeRegion data type in std namespace. Only using common +// attributes (with and without using 'OmitAutotuningMetadata' flag) of +// CodeRegion. Remaining attributes are compared in overloaded == function. +struct hash { + std::size_t operator()(const autotuning::CodeRegion &CR) const { + return llvm::hash_combine(CR.getPassName(), CR.getType()); + } +}; +} // namespace std + +namespace llvm { +// Forward Decleration. +class CallBase; + +typedef autotuning::CodeRegion CodeRegion; +template <> struct DenseMapInfo { + static bool isEqual(const CodeRegion &LHS, const CodeRegion &RHS) { + return LHS == RHS; + } + static inline CodeRegion getEmptyKey() { + return autotuning::CodeRegion::getEmptyInstance(); + } + static inline CodeRegion getTombstoneKey() { + return autotuning::CodeRegion::getInvalidInstance(); + } + // Implement hash for CodeRegion data type in llvm namespace. Only using + // common attributes (with and without using 'OmitAutotuningMetadata' flag) + // of CodeRegion. Remaining attributes are compared in overloaded == + // function. + static unsigned getHashValue(const CodeRegion &CR) { + return llvm::hash_combine(CR.getPassName(), CR.getType()); + } +}; +} // namespace llvm + +namespace autotuning { +using namespace llvm; +typedef std::unordered_map LookUpTable; +typedef llvm::SetVector CodeRegions; + +/// Structure to store information of CallSite code regions which is used to +/// get a different SourceLocation for multiple callsites (same callee) in a +/// function when these callsites have same SourceLocation due to inlining. +struct CallSiteLocation { + llvm::CallBase *CB; + llvm::Function *Caller; + llvm::Function *Callee; + SourceLocation SrcLoc; +}; + +class AutoTuningEngine { +public: + AutoTuningEngine() { Enabled = false; } + ~AutoTuningEngine() {} + + /// Initialize the Container for auto-tuning. + void initContainer(Container *Container, const std::string &PassName, + const StringRef FuncName = "", bool AddOpportunity = true, + unsigned int Invocation = 0); + + /// Initialize auto-tuning. This method should only be called in the main + /// function. + /// \return Error::success() on success or the related Error otherwise. + llvm::Error init(const std::string &ModuleID); + + /// Finalize auto-tuning. This method should only be called in the main + /// function. + /// \return Error::success() on success or the related Error otherwise. + llvm::Error finalize(); + + /// Return the number of tuning configuration used for this compilation. + llvm::Expected getConfigNumber(); + + void enable() { Enabled = true; } + void disable() { Enabled = false; } + bool isEnabled() const { return Enabled; } + bool isMLEnabled() const { return MLEnabled; } + bool isDumpEnabled() const { return DumpEnabled; } + bool isGenerateOutput() const { return GenerateOutput; } + bool isParseInput() const { return ParseInput; } + bool isTuningAllowedForType(CodeRegionType CRType) const { + return (CodeRegionFilterTypes.count(CRType) > 0); + } + bool isThinLTOTuning() const; + + /// Convert a pass-name to CodeRegionType. + CodeRegionType convertPassToType(std::string Pass); + + /// First sets BaselineConfig value for the CR then + /// add a tuning opportunity into the TuningOpps list. + void addOpportunity(const CodeRegion &OppCR, + std::map BaselineConfig = {}); + bool hasOpportunities() const { return TuningOpps.empty(); } + + bool shouldRunOptPass(std::string FileName, std::string Pass); + + /// Insert all of the callsites of a function in CallSiteLocs vector. + void insertCallSiteLoc(CallSiteLocation Loc); + + /// Update CallSiteLocs vector with new callsites (if any) which get available + /// due to inlining. + void updateCallSiteLocs(llvm::CallBase *CB, llvm::CallBase *Ptr, + llvm::Function *F, unsigned int Line); + + /// Clean up the CallSiteLocs vector by keeping the callsite if there are + /// multiple calls to same callee. This cleaning will be perform before + /// inlining any callsite. + void cleanCallSiteLoc(); + + /// clear the CallSiteLocs vector. + void clearCallSiteLocs(); + + /// Return the SourceLocation::SourceLine (if available). + std::optional getCallSiteLoc(llvm::CallBase *CB); + + template + bool lookUpGlobalParams(const std::string &ParamsName, T &Value) const; + /// A map storing llvm parameters. + std::unordered_map LLVMParams; + /// A map storing program parameters. + std::unordered_map ProgramParams; + +private: + std::string ModuleID; + /// This boolean indicates if the auto-tuning mode is enabled. + /// It will be set to true if the any of the following command line options + /// (auto-tuning-input, auto-tuning-result and auto-tuning-opp) is specified. + bool Enabled; + /// This boolean indicates if the ML guidance feature is enabled in + /// Autotuner. It will be set to true if -fautotune-rank is specified. + bool MLEnabled; + /// This boolean indicates if the IR dumping is enabled or not. IR dumping + /// is enabled for ML guidance feature. It can also be enabled with command + /// line compiler flag 'enable-autotuning-dump'. + bool DumpEnabled = false; + /// This boolean indicates if compiler is parsing/using 'config.yaml' file + /// generated by AutoTuner and use the configuration values instead of + /// determining with compiler heuristic. + bool ParseInput; + /// This boolean indicates if compiler is creating/generating opportunity + /// file(s) which will be consumed by AutoTuner to create the search space. + bool GenerateOutput; + /// A map of filename and set of optimization passes; an optimization pass + /// will be added to this set if a CodeRegion belongs to the optimization + /// pass. + std::unordered_map> OppPassList; + + /// Vector to store all of the duplicate calls in a function and the calls + /// which get available due to inlining. + SmallVector CallSiteLocs; + + /// A set to store the code region types that will be tuned in current + /// autotuning flow. This will be populated with code region types based on + /// 'auto-tuning-type-filter' for -fautotune-generate and the types will be + /// extracted from config.yaml in case of -fautotune. + /// This set is used to apply type-based filtering prior to creating/ + /// initializing a code region. + std::unordered_set CodeRegionFilterTypes; + + // A statically initialized map used to convert 'pass-name' to + // 'CodeRegionType'. + std::unordered_map PTTMap; + + /// A map of CodeRegion and ParameterManager to keep track of all the + /// parameters of code regions loaded from input config file. + LookUpTable ParamTable; + /// A list of CodeRegions as tuning opportunities + CodeRegions TuningOpps; + /// A ParameterManager for global parameters. + ParameterManager GlobalParams; + + /// Apply filters for CodeRegions. + void applyOppFilters(CodeRegions &CRs); + + /// Apply function name filter for CodeRegions. + bool applyFunctionFilter(std::string FuncName); + + friend class Container; + friend class CodeRegion; + friend class AutoTuningRemarkManager; +}; + +extern class AutoTuningEngine Engine; // AutoTuning Engine + +} // end namespace autotuning + +#endif /* LLVM_AUTOTUNER_AUTOTUNING_H_ */ +#endif diff --git a/llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h b/llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h new file mode 100644 index 0000000000000000000000000000000000000000..153a2c6246adbe2741b9463aec2e544f11f49609 --- /dev/null +++ b/llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h @@ -0,0 +1,43 @@ +#if defined(ENABLE_AUTOTUNER) +//===- llvm/AutoTuner/AutoTuningRemarkManager.h - Remark Manager ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the main interface for inputting and outputting +// remarks for AutoTuning. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AUTOTUNINGREMARKMANAGER_H +#define LLVM_AUTOTUNINGREMARKMANAGER_H + +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/Remarks/RemarkStreamer.h" +#include "llvm/Support/Error.h" +#include +#include +#include + +namespace autotuning { +class AutoTuningRemarkManager { +public: + /// Read a list of parameters from input file. + /// Return true on success and false on failure. + static llvm::Error read(autotuning::AutoTuningEngine &E, + const std::string &InputName, + const std::string &RemarksFormat); + + /// Dump a list of CodeRegions as tuning opportunities into a file. + /// Return true on success and false on failure. + static llvm::Error dump(const autotuning::AutoTuningEngine &E, + const std::string &DirPath, + const std::string &RemarksFormat, + const std::string &RemarksPasses); +}; +} // namespace autotuning +#endif // LLVM_AUTOTUNINGREMARKMANAGER_H +#endif diff --git a/llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h b/llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h new file mode 100644 index 0000000000000000000000000000000000000000..0096139b12e9c930aec8c047706f2f00c93f4394 --- /dev/null +++ b/llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h @@ -0,0 +1,47 @@ +#if defined(ENABLE_AUTOTUNER) +// ===------------ llvm/AutoTuner/AutoTuningRemarkStreamer.h --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved. +// +// ===---------------------------------------------------------------------===// +// +// This file contains the implementation of the conversion between AutoTuner +// CodeRegions and serializable remarks::Remark objects. +// +// ===---------------------------------------------------------------------===// + +#ifndef LLVM_AUTOTUNER_AUTOTUNINGREMARKSTREAMER_H +#define LLVM_AUTOTUNER_AUTOTUNINGREMARKSTREAMER_H + +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkStreamer.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ToolOutputFile.h" +#include +#include + +namespace llvm { +/// Streamer for AutoTuner remarks which has logic for dealing with CodeRegions. +class AutoTuningRemarkStreamer { + remarks::RemarkStreamer &RS; + /// Convert CodeRegion into remark objects. + remarks::Remark toRemark(const autotuning::CodeRegion &CR); + +public: + AutoTuningRemarkStreamer(remarks::RemarkStreamer &RS) : RS(RS) {} + /// Emit a CodeRegion through the streamer. + void emit(const autotuning::CodeRegion &CR); + /// Set a pass filter based on a regex \p Filter. + /// Returns an error if the regex is invalid. + Error setFilter(StringRef Filter); +}; +} // end namespace llvm + +#endif // LLVM_AUTOTUNER_AUTOTUNINGREMARKSTREAMER_H +#endif diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 52388692c19664d6e5b13b2d327ed4554f6b272b..95ac9acf4e5e46dba968507b38b1d383de5a06b9 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -27,6 +27,9 @@ #include #include #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif namespace llvm { @@ -91,9 +94,19 @@ public: void deleteNode(MachineInstr *MI); }; +#if defined(ENABLE_AUTOTUNER) +class MachineBasicBlock + : public ilist_node_with_parent, + public autotuning::Container { +#else class MachineBasicBlock : public ilist_node_with_parent { +#endif public: +#if defined(ENABLE_AUTOTUNER) + void initCodeRegion() override; + uint64_t computeStructuralHash() override; +#endif /// Pair of physical register and lane mask. /// This is not simply a std::pair typedef because the members should be named /// clearly as they both have an integer type. diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index 93cf0d27e9a73e5ee2d3e604642fe5730f8c6bcd..c0db48ae17892083e3313bc19eb681bd1392e925 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -37,6 +37,9 @@ #include #include #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif namespace llvm { @@ -56,6 +59,24 @@ class User; class BranchProbabilityInfo; class BlockFrequencyInfo; +#if defined(ENABLE_AUTOTUNER) +class AutoTuningEnabledFunction : public autotuning::Container { +public: + AutoTuningEnabledFunction() = delete; + void initCodeRegion() override; + void setHot() { this->Hotness = autotuning::Hot; } + void setCold() { this->Hotness = autotuning::Cold; } + autotuning::HotnessType getHotness() const { return this->Hotness; } + uint64_t computeStructuralHash() override; + +private: + AutoTuningEnabledFunction(Function *F) { Func = F; }; + Function *Func; + autotuning::HotnessType Hotness = autotuning::Unknown; + friend class Function; +}; +#endif + class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject, public ilist_node { public: @@ -68,6 +89,13 @@ public: using arg_iterator = Argument *; using const_arg_iterator = const Argument *; +#if defined(ENABLE_AUTOTUNER) + // There is one-to-one correspondence between ATEFunction and the current + // Function object to avoid messing up the LLVM User and owned Use classes' + // memory layout. + AutoTuningEnabledFunction ATEFunction = AutoTuningEnabledFunction(this); +#endif + private: // Important things that make up a function! BasicBlockListType BasicBlocks; ///< The basic blocks @@ -128,6 +156,11 @@ public: void operator=(const Function&) = delete; ~Function(); +#if defined(ENABLE_AUTOTUNER) + // Return the auto-tuning enabled version of this Function object. + AutoTuningEnabledFunction &getATEFunction() { return ATEFunction; } +#endif + // This is here to help easily convert from FunctionT * (Function * or // MachineFunction *) in BlockFrequencyInfoImpl to Function * by calling // FunctionT->getFunction(). @@ -840,7 +873,11 @@ public: /// AssemblyAnnotationWriter. void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW = nullptr, bool ShouldPreserveUseListOrder = false, +#if defined(ENABLE_AUTOTUNER) + bool IsForDebug = false, bool PrintCompleteIR = false) const; +#else bool IsForDebug = false) const; +#endif /// viewCFG - This function is meant for use from the debugger. You can just /// say 'call F->viewCFG()' and a ghostview window should pop up from the diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 6095b0a1be69cb3a4f5b6a0072cc5bf2a0db5ef6..dcc9bbee30fa25210563afa2e7804f719267ebc1 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1169,6 +1169,23 @@ public: using OperandBundleDef = OperandBundleDefT; using ConstOperandBundleDef = OperandBundleDefT; +#if defined(ENABLE_AUTOTUNER) +//===----------------------------------------------------------------------===// +// AutoTuningEnabledCallSite Class +//===----------------------------------------------------------------------===// +class CallBase; +class AutoTuningEnabledCallSite : public autotuning::Container { +public: + AutoTuningEnabledCallSite() = delete; + void initCodeRegion() override; + uint64_t computeStructuralHash() override; + AutoTuningEnabledCallSite(CallBase *CallBase) { CB = CallBase; } + +private: + CallBase *CB; +}; +#endif + //===----------------------------------------------------------------------===// // CallBase Class //===----------------------------------------------------------------------===// @@ -1229,6 +1246,13 @@ protected: unsigned getNumSubclassExtraOperandsDynamic() const; public: +#if defined(ENABLE_AUTOTUNER) + // There is one-to-one correspondence between ATECallSite and CallBase class + // to enable auto-tuning. + std::unique_ptr ATECallSite = + std::make_unique(this); +#endif + using Instruction::getContext; /// Create a clone of \p CB with a different set of operand bundles and diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 8d60384e1a32fc5c23ff139afbf95f97237a8aa3..9d638af6eeeffbccf58604455277deaa16e3cec3 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -3287,6 +3287,23 @@ struct OperandTraits : public VariadicOperandTraits { DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value) +#if defined(ENABLE_AUTOTUNER) +//===----------------------------------------------------------------------===// +// AutoTuningEnabledSwitchInst Class +//===----------------------------------------------------------------------===// +class SwitchInst; + +class AutoTuningEnabledSwitchInst : public autotuning::Container { +public: + AutoTuningEnabledSwitchInst() = delete; + void initCodeRegion() override; + uint64_t computeStructuralHash() override; + AutoTuningEnabledSwitchInst(SwitchInst *SwitchInst) { SI = SwitchInst; } + +private: + SwitchInst *SI; +}; +#endif //===----------------------------------------------------------------------===// // SwitchInst Class //===----------------------------------------------------------------------===// @@ -3332,6 +3349,13 @@ protected: public: void operator delete(void *Ptr) { User::operator delete(Ptr); } +#if defined(ENABLE_AUTOTUNER) + // There is one-to-one correspondence between ATESwitchInst and + // SwitchInst class to enable AutoTuner. + std::unique_ptr ATESwitchInst = + std::make_unique(this); +#endif + // -2 static const unsigned DefaultPseudoIndex = static_cast(~0L-1); diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h index 670a40b28eabbeaf58eb018b5a4aceb58bf1472c..904a450a18887c4325decce3b0242908a3451e70 100644 --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -38,6 +38,9 @@ #include #include #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif namespace llvm { diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index 1bdeb85afa3c50f0ee3331e4c520a7c7e00d011b..c0bcc8153eb8427552f4dd4d1d7ba14132bfe617 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -15,6 +15,9 @@ #define LLVM_IR_STRUCTURALHASH_H #include +#if defined(ENABLE_AUTOTUNER) +#include +#endif namespace llvm { @@ -24,6 +27,17 @@ class Module; uint64_t StructuralHash(const Function &F); uint64_t StructuralHash(const Module &M); +#if defined(ENABLE_AUTOTUNER) +class MachineBasicBlock; +class BasicBlock; +class CallBase; +class SwitchInst; + +uint64_t StructuralHash(const std::vector BBs); +uint64_t StructuralHash(const MachineBasicBlock &MBB); +uint64_t StructuralHash(const CallBase &CB); +uint64_t StructuralHash(const SwitchInst &SI); +#endif } // end namespace llvm #endif diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index c6fee47b464b952b58eb542078d89681045b242c..80bec2d82e247a16f1efb5360407560e2134f347 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -340,6 +340,11 @@ void initializeWasmEHPreparePass(PassRegistry&); void initializeWinEHPreparePass(PassRegistry&); void initializeWriteBitcodePassPass(PassRegistry&); void initializeXRayInstrumentationPass(PassRegistry&); +#if defined(ENABLE_AUTOTUNER) +void initializeAutotuningDumpLegacyPass(PassRegistry &); +void initializeAutoTuningCompileFunctionLegacyPass(PassRegistry &); +void initializeAutoTuningCompileModuleLegacyPass(PassRegistry &); +#endif } // end namespace llvm diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 7420ea64e954356c50ae67ea8672420bad21061f..3a8ecb1399f110f55d9c092dbd31255b5cd8e123 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -54,6 +54,9 @@ #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include "llvm/Transforms/Vectorize.h" #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/Transforms/Scalar/AutoTuningCompile.h" +#endif namespace { struct ForcePassLinking { @@ -93,6 +96,11 @@ namespace { (void) llvm::createInstSimplifyLegacyPass(); (void) llvm::createInstructionCombiningPass(); (void) llvm::createJMCInstrumenterPass(); +#if defined(ENABLE_AUTOTUNER) + (void) llvm::createAutotuningDumpPass(); + (void) llvm::createAutoTuningCompileFunctionLegacyPass(); + (void) llvm::createAutoTuningCompileModuleLegacyPass(); +#endif (void) llvm::createKCFIPass(); (void) llvm::createLCSSAPass(); (void) llvm::createLICMPass(); diff --git a/llvm/include/llvm/Remarks/Remark.h b/llvm/include/llvm/Remarks/Remark.h index a66f7ed73f2f50799738358ab187e4c0882a5abd..3bcc0c7104985b7c704e3f525ad61f94f4835e3c 100644 --- a/llvm/include/llvm/Remarks/Remark.h +++ b/llvm/include/llvm/Remarks/Remark.h @@ -20,6 +20,10 @@ #include "llvm/Support/raw_ostream.h" #include #include +#if defined(ENABLE_AUTOTUNER) +#include +#include +#endif namespace llvm { namespace remarks { @@ -47,6 +51,9 @@ struct Argument { StringRef Key; // FIXME: We might want to be able to store other types than strings here. StringRef Val; +#if defined(ENABLE_AUTOTUNER) + std::optional> VectorVal; +#endif // If set, the debug location corresponding to the value. std::optional Loc; @@ -65,6 +72,9 @@ enum class Type { Analysis, AnalysisFPCommute, AnalysisAliasing, +#if defined(ENABLE_AUTOTUNER) + AutoTuning, +#endif Failure, First = Unknown, Last = Failure @@ -105,6 +115,28 @@ struct Remark { /// Mangled name of the function that triggers the emssion of this remark. StringRef FunctionName; +#if defined(ENABLE_AUTOTUNER) + /// Type of the code region that the remark is associated with. + std::optional CodeRegionType; + + /// Configuration value for generating the same baseline binary associated + /// with this remark. + std::optional> BaselineConfig; + + /// Hash of the code region that the remark is associated with. + std::optional CodeRegionHash; + + /// Configs values passed to AutoTuner for dynamic setting of search space + /// for code regions. + std::optional>> + AutoTunerOptions; + + /// Invocation/Registering of Optimization Pass in the compilation pipeline. + /// It is used to differentiate between different invocations of same + /// optimization pass. + std::optional Invocation; +#endif + /// The location in the source file of the remark. std::optional Loc; diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index d2079fead66808cec640007a111dc502fd61bcc3..c59dba2749f0bf8d186f087c808b521497398c8e 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -40,6 +40,9 @@ #include #include +#if defined(ENABLE_AUTOTUNER) +#include +#endif namespace llvm { namespace vfs { @@ -72,6 +75,20 @@ bool ParseCommandLineOptions(int argc, const char *const *argv, const char *EnvVar = nullptr, bool LongOptionsUseDoubleDash = false); +#if defined(ENABLE_AUTOTUNER) +// It will parse AutoTuner options (LLVMParams & ProgramParams) and add them as +// command line flags for the compilation process. These options are suggested +// by AutoTuner during tuning flow. This function will always be called after +// AutoTuner initialization. +// Returns true on success. Otherwise, this will print the error message to +// stderr and exit. +bool ParseAutoTunerOptions( + std::unordered_map LLVMParams, + std::unordered_map ProgramParams, + StringRef Overview = "", raw_ostream *Errs = nullptr, + const char *EnvVar = nullptr, bool LongOptionsUseDoubleDash = false); +#endif + // Function pointer type for printing version information. using VersionPrinterTy = std::function; diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index aaba710cfde61ccf9efc47821d248acd941e3783..e69beeade94785c3e1ba29ba0a66f75350d70687 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -16,6 +16,10 @@ #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/Pass.h" +#include +#endif namespace llvm { @@ -299,6 +303,19 @@ Pass *createLoopSimplifyCFGPass(); // FunctionPass *createInstSimplifyLegacyPass(); +#if defined(ENABLE_AUTOTUNER) +//===--------------------------------------------------------------------===// +// +// createAutotuningCompilePass - It writes IR files with -fautotune-generate +// for autotuning flow. It also enables/disables the execution of optimization +// passes in subsequent compilations (with -fautotune) based on autotuning +// methodology and available opportunities. +// +FunctionPass * +createAutoTuningCompileFunctionLegacyPass(std::string Pass = "unknown"); +ModulePass * +createAutoTuningCompileModuleLegacyPass(std::string Pass = "unknown"); +#endif //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h b/llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h new file mode 100644 index 0000000000000000000000000000000000000000..2cbb48f336efcaaf7968981b1a5e14cedc950547 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h @@ -0,0 +1,170 @@ +#if defined(ENABLE_AUTOTUNER) +//===---------------- AutoTuningCompile.h - Auto-Tuning -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file declares the interface for AutoTuning Incremental Compilation. +/// Incremental compilation requires two passes 1) Module Pass and 2) Function +/// Pass for legacy pass manager. It requires an additional Loop Pass for new +/// pass manager. +/// AutoTuningOptPassGate class is also defined here which is used to enable/ +/// disable the execution of optimization passes for the compilation pipeline. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AUTOTUNER_AUTOTUNING_COMPILE_H_ +#define LLVM_AUTOTUNER_AUTOTUNING_COMPILE_H_ + +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/IR/OptBisect.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" + +namespace llvm { + +class Pass; + +// Skips or runs optimization passes. +class AutoTuningOptPassGate : public OptPassGate { +public: + explicit AutoTuningOptPassGate(bool Skip = false) : Skip(Skip) {} + + bool shouldRunPass(const StringRef PassName, + StringRef IRDescription) override; + bool isEnabled() const override { return true; } + bool checkPass(const StringRef PassName, const StringRef TargetDesc); + void setSkip(bool Skip) { this->Skip = Skip; } + bool getSkip() const { return Skip; } + +private: + bool Skip; +}; + +// Returns a static AutoTuningOptPassGate object which will be used to register +// CallBack for OptBisect instrumentation. +// It will also be used by AutoTuningCompile passes to enable/disable +// optimization passes. +AutoTuningOptPassGate &getAutoTuningOptPassGate(); + +class AutoTuningCompileModule { +public: + explicit AutoTuningCompileModule(std::string Pass = "unknown"); + bool run(Module &M); + // Write IR files for each module to be re-used in subsequent compilations + // for autotuning cycles. It only works with -fautotune-generate. + void writeIRFiles(Module &M) const; + // Enable/Disable execution of optimization passes in subsequent compilations + // based on autotuning methodology and available opportunities. It Only works + // with -fautotune + bool modifyCompilationPipeline(Module &M) const; + + static void setSkipCompilation(bool Option) { SkipCompilation = Option; } + static bool getSkipCompilation() { return SkipCompilation; } + +private: + static bool SkipCompilation; + std::string Pass = ""; +}; + +class AutoTuningCompileModuleLegacy : public ModulePass { +public: + static char ID; + explicit AutoTuningCompileModuleLegacy(std::string Pass = "unknown"); + bool runOnModule(Module &M) override; + StringRef getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + +private: + std::string Pass = ""; +}; + +class AutoTuningCompileModulePass + : public PassInfoMixin { +public: + explicit AutoTuningCompileModulePass(std::string Pass = "unknown") + : Pass(Pass){}; + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); + +private: + std::string Pass = ""; +}; + +class AutoTuningCompileFunction { +public: + explicit AutoTuningCompileFunction(std::string Pass = "unknown"); + bool run(Function &F); + // Write IR files for each module to be re-used in subsequent compilations + // for autotuning cycles. It only works with -fautotune-generate. + void writeIRFiles(Module &M); + // Enable/Disable execution of optimization passes in subsequent compilations + // based on autotuning methodology and available opportunities. It Only works + // with -fautotune + bool modifyCompilationPipeline(Function &F); + +private: + // A module may have multiple functions; decision to enable/disable + // execution of an optimization pass will be made for the first function and + // will be used for all of the functions in the module. + // 'SkipDecision' will be set once the decision is made for a specific 'Pass'. + bool SkipDecision = false; + + // A module may have multiple functions; IR file will be written once for the + // entire module for a specific 'Pass'. + bool IsModuleWritten = false; + std::string Pass = ""; +}; + +class AutoTuningCompileFunctionLegacy : public FunctionPass { +public: + static char ID; + explicit AutoTuningCompileFunctionLegacy(std::string Pass = "unknown"); + bool runOnFunction(Function &F) override; + StringRef getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + +private: + std::string Pass = ""; +}; + +class AutoTuningCompileFunctionPass + : public PassInfoMixin { +public: + explicit AutoTuningCompileFunctionPass(std::string Pass = "unknown") + : Pass(Pass){}; + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + +private: + std::string Pass = ""; +}; + +class AutoTuningCompileLoopPass + : public PassInfoMixin { +public: + explicit AutoTuningCompileLoopPass(std::string Pass = "unknown") + : Pass(Pass){}; + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); + +private: + std::string Pass = ""; +}; + +} // end namespace llvm + +#endif /* LLVM_AUTOTUNER_AUTOTUNING_COMPILE_H_ */ +#endif diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index 4f3010965b591df7e8a2bb0d63ab9ff2b9a35fb2..e1cccf417898ba11e1e0ec6c614a79705cc1c913 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -108,7 +108,11 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, +#if defined(ENABLE_AUTOTUNER) + bool &UseUpperBound, unsigned int Invocation = 0); +#else bool &UseUpperBound); +#endif void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, diff --git a/llvm/lib/Analysis/AutotuningDump.cpp b/llvm/lib/Analysis/AutotuningDump.cpp new file mode 100644 index 0000000000000000000000000000000000000000..81b2bbead70e33f6262588a60463bde8a1af5e68 --- /dev/null +++ b/llvm/lib/Analysis/AutotuningDump.cpp @@ -0,0 +1,265 @@ +#if defined(ENABLE_AUTOTUNER) +// ===-- AutotuningDump.cpp - Auto-Tuning---------------------------------===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// ===--------------------------------------------------------------------===// +// +// This file contains pass collecting IR of tuned regions and storing them into +// predetrmined locations, to be used later by autotuning ML guidance +// +// ===--------------------------------------------------------------------===// +#include "llvm/Analysis/AutotuningDump.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "autotuning-dump" + +enum AutotuningDumpOpt { whole_modules, functions, loops }; + +// Enable Debug Options to be specified on the command line +cl::opt AutotuningDumpMode( + "autotuning-dump-mode", cl::desc("Choose autotuning dump mode:"), + cl::init(whole_modules), + cl::values(clEnumVal(whole_modules, "dump each module in its own file"), + clEnumVal(functions, "dump each function in its own file"), + clEnumVal(loops, "dump each loop in its own file"))); + +AutotuningDump::AutotuningDump(bool IncrementalCompilation) { + // Check if the environment variable AUTOTUNE_DATADIR is set. + IsIncrementalCompilation = IncrementalCompilation; + AutoTuneDirPath = "autotune_datadir"; + if (std::optional MaybePath = + llvm::sys::Process::GetEnv("AUTOTUNE_DATADIR")) + AutoTuneDirPath = *MaybePath; +} + +int AutotuningDump::getConfigNumber() { + auto ConfigNumOrErr = autotuning::Engine.getConfigNumber(); + if (ConfigNumOrErr) + return *ConfigNumOrErr; + else { + report_fatal_error("Invalid/missing Autotuner configuration ID"); + return -1; + } +} + +void AutotuningDump::dumpToStream(llvm::raw_ostream &os, const Loop &L) const { + L.print(os); +} + +void AutotuningDump::dumpToStream(llvm::raw_ostream &os, + const Function &F) const { + F.print(os, /*AAW*/ nullptr, /*ShouldPreserveUseListOrder*/ false, + /*IsForDebug*/ false, /*PrintCompleteIR*/ true); +} + +// Create appropriate file. File will contains AbsolutePath/FileName. +std::unique_ptr AutotuningDump::createFile(const Twine &File) { + std::error_code EC; + return std::make_unique((File).str(), EC, + sys::fs::CD_CreateAlways, + sys::fs::FA_Write, sys::fs::OF_None); +} + +std::string AutotuningDump::getDirectoryName(const std::string File) const { + std::string DirectoryName = AutoTuneDirPath; + if (!autotuning::Engine.isMLEnabled()) + DirectoryName += "/IR_files"; + + DirectoryName = DirectoryName + "/" + File + "/"; + + // Create directory if not already present. + if (std::error_code EC = sys::fs::create_directories(DirectoryName)) + errs() << "could not create directory: " << DirectoryName << ": " + << EC.message(); + + return DirectoryName; +} + +std::string AutotuningDump::getFileName(std::string FilePath) { + if (autotuning::Engine.isMLEnabled()) + return std::to_string(this->getConfigNumber()) + ".ll"; + std::replace(FilePath.begin(), FilePath.end(), '/', '_'); + return FilePath + ".ll"; +} + +void AutotuningDump::dumpModule(Module &M) { + std::unique_ptr fptr; + LLVM_DEBUG(dbgs() << "AutotuningDump: Dump module IR files.\n"); + if (IsIncrementalCompilation) { + std::string Filename = M.getSourceFileName(); + llvm::SmallString<128> FilenameVec = StringRef(Filename); + llvm::sys::fs::make_absolute(FilenameVec); + size_t Pos = FilenameVec.rfind("."); + if (Pos != std::string::npos) { + FilenameVec.pop_back_n(FilenameVec.size() - Pos); + FilenameVec.append(".ll"); + } + fptr = createFile(FilenameVec); + } else { + std::string File = llvm::sys::path::filename(M.getName()).str(); + std::string DirectoryName = getDirectoryName(File); + std::string FileName = getFileName(M.getName().str()); + fptr = createFile(DirectoryName + FileName); + } + + M.print(*fptr, nullptr, true, false); +} + +void AutotuningDump::dumpFunctions(Module &M) { + std::string FilePath = M.getName().str(); + std::replace(FilePath.begin(), FilePath.end(), '/', '_'); + std::string DirectoryName = getDirectoryName(FilePath); + for (Function &F : M.getFunctionList()) { // go through all functions + if (F.isDeclaration() || F.empty()) + continue; + + AutoTuningEnabledFunction *AutotuneFunc = &F.getATEFunction(); + assert(AutotuneFunc); + autotuning::Engine.initContainer(AutotuneFunc, "autotuning-dump", + F.getName(), false); + std::string FuncName = F.getName().str(); + // check the whole function + if (AutotuneFunc->requiresIRDump(true)) { + auto fptr = createFile(DirectoryName + Twine(FuncName) + ".ll"); + this->dumpToStream(*fptr, F); + } + } +} + +void AutotuningDump::dumpLoops(Module &M, + function_ref GetLI) { + for (Function &F : M) { + // Nothing to do for declarations. + if (F.isDeclaration() || F.empty()) + continue; + + LoopInfo &LI = GetLI(F); + for (auto &L : LI.getLoopsInPreorder()) { + Function *Func = nullptr; + StringRef FuncName = ""; + if (!L->isInvalid()) + Func = L->getHeader()->getParent(); + if (Func) + FuncName = Func->getName(); + + autotuning::Engine.initContainer(L, "autotuning-dump", FuncName, false); + if (L->requiresIRDump()) { + std::string FuncName = L->getCodeRegion().getFuncName(); + unsigned SourceLine = L->getCodeRegion().getSourceLoc().SourceLine; + std::string DirectoryName = AutoTuneDirPath + "/" + + llvm::sys::path::filename(FuncName).str() + + "_loop_" + std::to_string(SourceLine); + std::string FileName = std::to_string(this->getConfigNumber()) + ".ll"; + auto fptr = createFile(DirectoryName + "/" + FileName); + this->dumpToStream(*fptr, *L); + } + } + } +} + +bool AutotuningDump::run(Module &M, + function_ref GetLI) { + // Change to absolute path. + SmallString<256> OutputPath = StringRef(AutoTuneDirPath); + sys::fs::make_absolute(OutputPath); + + // Creating new output directory, if it does not exists. + if (std::error_code EC = sys::fs::create_directories(OutputPath)) { + llvm::errs() << (make_error( + "could not create directory: " + Twine(OutputPath) + ": " + + EC.message(), + EC)); + return false; + } + + if (IsIncrementalCompilation) { + LLVM_DEBUG( + dbgs() + << "AutotuningDump: IR files writing for incremental compilation.\n"); + dumpModule(M); + return false; + } + + switch (AutotuningDumpMode) { + case whole_modules: + dumpModule(M); + break; + case functions: + dumpFunctions(M); + break; + case loops: + dumpLoops(M, GetLI); + } + + return false; +} + +AutotuningDumpLegacy::AutotuningDumpLegacy(bool IncrementalCompilation) + : ModulePass(AutotuningDumpLegacy::ID) { + IsIncrementalCompilation = IncrementalCompilation; + initializeAutotuningDumpLegacyPass(*PassRegistry::getPassRegistry()); +} + +bool AutotuningDumpLegacy::runOnModule(Module &M) { + if (!autotuning::Engine.isDumpEnabled()) + return false; + + auto GetLI = [this](Function &F) -> LoopInfo & { + return getAnalysis(F).getLoopInfo(); + }; + + AutotuningDump Impl(IsIncrementalCompilation); + return Impl.run(M, GetLI); +} + +StringRef AutotuningDumpLegacy::getPassName() const { + return "Autotuning Dump"; +} + +void AutotuningDumpLegacy::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); +} + +char AutotuningDumpLegacy::ID = 0; +INITIALIZE_PASS_BEGIN(AutotuningDumpLegacy, "autotuning-dump", + "Dump IR for Autotuned Code Regions", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(AutotuningDumpLegacy, "autotuning-dump", + "Dump IR for Autotuned Code Regions", false, false) + +ModulePass *llvm::createAutotuningDumpPass() { + return new AutotuningDumpLegacy(); +} + +AnalysisKey AutotuningDumpAnalysis::Key; + +AutotuningDumpAnalysis::Result +AutotuningDumpAnalysis::run(Module &M, ModuleAnalysisManager &AM) { + if (!autotuning::Engine.isDumpEnabled()) + return false; + + auto &FAM = AM.getResult(M).getManager(); + auto GetLI = [&FAM](Function &F) -> LoopInfo & { + return FAM.getResult(F); + }; + + AutotuningDump Impl(IsIncrementalCompilation); + Impl.run(M, GetLI); + return false; +} +#endif diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 4a1797c42789a0d94e01a6b194ed20212bbc77fa..9c6a70f0221f769742c7fd3f193cda0f7d3f8985 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -30,6 +30,7 @@ add_llvm_component_library(LLVMAnalysis Analysis.cpp AssumeBundleQueries.cpp AssumptionCache.cpp + AutotuningDump.cpp BasicAliasAnalysis.cpp BlockFrequencyInfo.cpp BlockFrequencyInfoImpl.cpp @@ -153,6 +154,7 @@ add_llvm_component_library(LLVMAnalysis ${MLLinkDeps} LINK_COMPONENTS + AutoTuner BinaryFormat Core Object diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp index e2480d51d372b6eeb975cb849058dd868f45134d..f6b3c14a03459e72991c9657ee0ee7fcb32d2d7c 100644 --- a/llvm/lib/Analysis/InlineAdvisor.cpp +++ b/llvm/lib/Analysis/InlineAdvisor.cpp @@ -383,15 +383,27 @@ llvm::shouldInline(CallBase &CB, Function *Callee = CB.getCalledFunction(); Function *Caller = CB.getCaller(); +#if defined(ENABLE_AUTOTUNER) + // Get the code Region to add BaselineConfig values for inline + const autotuning::CodeRegion &CR = CB.ATECallSite.get()->getCodeRegion(); + static const std::string ForceInlineParamStr = "ForceInline"; +#endif + if (IC.isAlways()) { LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) << ", Call: " << CB << "\n"); +#if defined(ENABLE_AUTOTUNER) + autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "1"}}); +#endif return IC; } if (!IC) { LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC) << ", Call: " << CB << "\n"); +#if defined(ENABLE_AUTOTUNER) + autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "0"}}); +#endif if (IC.isNever()) { ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) @@ -417,6 +429,9 @@ llvm::shouldInline(CallBase &CB, LLVM_DEBUG(dbgs() << " NOT Inlining: " << CB << " Cost = " << IC.getCost() << ", outer Cost = " << TotalSecondaryCost << '\n'); +#if defined(ENABLE_AUTOTUNER) + autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "0"}}); +#endif ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts", Call) @@ -430,6 +445,9 @@ llvm::shouldInline(CallBase &CB, LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) << ", Call: " << CB << '\n'); +#if defined(ENABLE_AUTOTUNER) + autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "1"}}); +#endif return IC; } diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index a2f46edcf5ef908250afcb9729093055bb4d884f..9f8f57865de27257470cbd2bb27dc7ba2e88587e 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -162,6 +162,14 @@ static cl::opt DisableGEPConstOperand( "disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands")); +#if defined(ENABLE_AUTOTUNER) +static cl::opt + EnableLocalCallSiteTuning("auto-tuning-enable-local-callsite-tuning", + cl::init(false), cl::Hidden, + cl::desc("Enable AutoTuning for local callsites " + "as well.")); +#endif + namespace llvm { std::optional getStringFnAttrAsInt(const Attribute &Attr) { if (Attr.isValid()) { @@ -2990,6 +2998,27 @@ InlineCost llvm::getInlineCost( return llvm::InlineCost::getNever(UserDecision->getFailureReason()); } +#if defined(ENABLE_AUTOTUNER) + if (autotuning::Engine.isEnabled() && Call.getCaller() && + (!Callee->hasLocalLinkage() || EnableLocalCallSiteTuning)) { + bool ForceInline = false; + bool Found = false; + + autotuning::Engine.initContainer(Call.ATECallSite.get(), "inline", + Call.getCaller()->getName(), + /* addOpportunity */ false); + + Found = Call.ATECallSite->lookUpParams("ForceInline", ForceInline); + + if (Found) { + if (ForceInline) + return llvm::InlineCost::getAlways("Force inlined by auto-tuning"); + else + return llvm::InlineCost::getNever("Force non-inlined by auto-tuning"); + } + } +#endif + LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "... (caller:" << Call.getCaller()->getName() << ")\n"); diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index 60a72079e864c1ef8a2500097c0cd3845f6518dd..36aca73ee675ec791482ba46b73deb4991a79af0 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -37,6 +37,10 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/GenericLoopInfoImpl.h" #include "llvm/Support/raw_ostream.h" +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/IR/StructuralHash.h" +#endif using namespace llvm; // Explicitly instantiate methods in LoopInfoImpl.h for IR-level Loops. @@ -663,6 +667,54 @@ Loop::LocRange Loop::getLocRange() const { return LocRange(); } +#if defined(ENABLE_AUTOTUNER) +uint64_t Loop::computeStructuralHash() { + std::vector BBs = getBlocks(); + return StructuralHash(BBs); +} + +void Loop::initCodeRegion() { + std::string LoopName; + // use the header's name as the loop name + if (BasicBlock *Header = getHeader()) { + if (Header->hasName()) { + LoopName = Header->getName().str(); + } + // if the header doesn't have a name, + // use the label of this header from AsmWriter + else { + std::string Str; + llvm::raw_string_ostream RSO(Str); + Header->printAsOperand(RSO); + LoopName = RSO.str(); + } + } else { + LoopName = ""; + } + + Function *F = this->getHeader()->getParent(); + StringRef FuncName = F->getName(); + + // init the CodeRegion + autotuning::CodeRegion CR = autotuning::CodeRegion( + LoopName, FuncName.data(), autotuning::CodeRegionType::Loop, + this->getStartLoc()); + // Compute the number of non-debug IR instructions in this loop. + unsigned TotalNumInstrs = 0; + for (const BasicBlock *BB : this->getBlocks()) { + unsigned NumInstrs = std::distance(BB->instructionsWithoutDebug().begin(), + BB->instructionsWithoutDebug().end()); + TotalNumInstrs += NumInstrs; + } + CR.setSize(TotalNumInstrs); + // Compute hotness. + autotuning::HotnessType Hotness = F->ATEFunction.getHotness(); + CR.setHotness(Hotness); + + this->setCodeRegion(CR); +} +#endif + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void Loop::dump() const { print(dbgs()); } diff --git a/llvm/lib/AutoTuner/AutoTuning.cpp b/llvm/lib/AutoTuner/AutoTuning.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1f09f06d84a2ddb4f51206a7427763369e3eb4ee --- /dev/null +++ b/llvm/lib/AutoTuner/AutoTuning.cpp @@ -0,0 +1,705 @@ +#if defined(ENABLE_AUTOTUNER) +//===-- AutoTuning.cpp - Auto-Tuning --------------------------------------===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines Auto Tuning related functions, models and interfaces. +// +//===----------------------------------------------------------------------===// + +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/AutoTuner/AutoTuningRemarkManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Process.h" + +// Enable debug messages for AutoTuning. +#define DEBUG_TYPE "autotuning" + +using namespace llvm; + +// defined in 'lib/Remarks/YAMLRemarkParser.cpp'. +extern cl::opt OmitAutotuningMetadata; + +// -auto-tuning-input - Command line option to specify the input file. +static cl::opt InputFile("auto-tuning-input", cl::Hidden, + cl::desc("Specify the input file")); + +// -auto-tuning-opp - Command line option to specify the output directory of +// tuning opportunities. +static cl::opt OutputOppDir( + "auto-tuning-opp", cl::Hidden, + cl::desc("Specify the output directory of tuning opportunities")); + +static cl::opt + RemarksPasses("auto-tuning-pass-filter", cl::Hidden, + cl::desc("Only dump auto-tuning remarks from passes whose " + "names match the given regular expression"), + cl::value_desc("regex")); + +static cl::opt + ProjectDir("autotuning-project-dir", cl::Hidden, cl::init(""), + cl::desc("Specify project base dir to make code region name " + "relative to base dir. This operation will only be " + "applied for coarse-grain code regions.")); + +// -auto-tuning-config-id - Command line option to specify the config number +// being used for compilation. Required only for ML guidance feature. +static cl::opt CFGNumber( + "auto-tuning-config-id", cl::Hidden, + cl::desc( + "Specify the auto-tuning configuration ID used in this compilation.")); + +static cl::opt OutputFormat( + "auto-tuning-remark-format", cl::Hidden, + cl::desc("The format used for auto-tuning remarks (default: YAML)"), + cl::value_desc("format"), cl::init("yaml")); + +// AutoTuner incremental compilation options. +cl::opt AutoTuningCompileMode( + "auto-tuning-compile-mode", cl::Hidden, cl::init(Inactive), + cl::desc("AutoTuner: Choose incremental compilation mode."), + cl::values(clEnumVal(Inactive, + "AutoTuner: Disable incremental compilation."), + clEnumVal(CoarseGrain, "AutoTuner: Enable incremental " + "compilation for coarse grain tuning."), + clEnumVal(FineGrain, "AutoTuner: Enable incremental compilation " + "for fine grain tuning."), + clEnumVal(Basic, "AutoTuner: Enable incremental compilation for " + "any kind of code region."))); + +static cl::opt + EnableAutoTuningDump("enable-autotuning-dump", cl::Hidden, cl::init(false), + cl::desc("Enable AutoTuningDump Pass")); + +static cl::opt + ThinLTOTuning("autotuning-thin-lto", cl::Hidden, cl::init(false), + cl::desc("AutoTuner enabled in ThinLTO mode.")); + +namespace autotuning { + +static cl::list AutotuningOutputFilter( + "auto-tuning-type-filter", cl::Hidden, cl::CommaSeparated, + cl::desc( + "Select types of code regions to dump auto-tuning opportunities for:"), + cl::values(clEnumVal(LLVMParam, "LLVMParam code regions only"), + clEnumVal(ProgramParam, "ProgramParam code regions only"), + clEnumVal(CallSite, "CallSite code regions only"), + clEnumVal(Function, "Function code regions only"), + clEnumVal(Loop, "Loop code regions only"), + clEnumVal(MachineBasicBlock, + "Machine basic block code regions only"), + clEnumVal(Switch, "Switch code regions only"), + clEnumVal(Other, "All other types of code regions"))); + +static cl::list AutotuningFunctionFilter( + "auto-tuning-function-filter", cl::Hidden, cl::CommaSeparated, + cl::desc("Apply code region filtering based on function names")); + +static const cl::opt ExcludeColdCodeRegion( + "auto-tuning-exclude-cold", cl::Hidden, cl::init(true), + cl::desc("Use profile data to prune cold code regions from auto-tuning")); + +static const cl::opt CodeRegionMatchingWithHash( + "auto-tuning-code-region-matching-hash", cl::Hidden, cl::init(true), + cl::desc("Use IR hashing to match the Code Regions")); + +static const cl::opt HotCodeRegionOnly( + "auto-tuning-hot-only", cl::Hidden, cl::init(false), + cl::desc( + "Use profile data to include hot code regions only from auto-tuning")); + +static const cl::opt + SizeThreshold("auto-tuning-size-threshold", cl::Hidden, cl::init(0), + cl::desc("Prune small code regions from auto-tuning with a " + "size smaller than the threshold")); + +static inline const std::string generateName(const std::string &Name) { + if (Name.empty()) + return "unnamed"; + else + return Name; +} + +//===----------------------------------------------------------------------===// +// CodeRegion implementation +CodeRegion::CodeRegion(const CodeRegionType Type) : Type(Type) {} + +CodeRegion::CodeRegion(const std::string &Name, const std::string &FuncName, + const CodeRegionType &Type, const DebugLoc &DL, + const DynamicOptions DO) { + this->Name = generateName(Name); + this->FuncName = generateName(FuncName); + this->Type = Type; + this->StringType = getTypeAsString(Type); + if (DL) { + StringRef File = DL->getFilename(); + unsigned Line = DL->getLine(); + unsigned Col = DL->getColumn(); + this->Location = SourceLocation{File.str(), Line, Col}; + } + this->AutoTunerOptions = DO; +} + +CodeRegion::CodeRegion(const std::string &Name, const std::string &FuncName, + const CodeRegionType &Type, + const SourceLocation &Location, + const DynamicOptions DO) { + this->Name = generateName(Name); + this->FuncName = generateName(FuncName); + this->Type = Type; + this->StringType = getTypeAsString(Type); + this->Location = Location; + this->AutoTunerOptions = DO; +} + +CodeRegion::CodeRegion(const std::string &Name, const std::string &FuncName, + const std::string &PassName, const CodeRegionType &Type, + const SourceLocation &Location, + const unsigned int Invocation) + : CodeRegion(Name, FuncName, Type, Location) { + this->PassName = generateName(PassName); + this->Invocation = Invocation; +} + +bool CodeRegion::operator==(const CodeRegion &CodeRegion) const { + bool IsEqual = false; + if (OmitAutotuningMetadata) + IsEqual = (this->getHash() == CodeRegion.getHash()) && + (this->Type == CodeRegion.getType()) && + (this->PassName == CodeRegion.getPassName()); + else { + IsEqual = (this->Type == CodeRegion.getType()) && + (this->Name == CodeRegion.getName()) && + (this->PassName == CodeRegion.getPassName()) && + (this->FuncName == CodeRegion.getFuncName()) && + (this->Location == CodeRegion.getSourceLoc()); + if (CodeRegionMatchingWithHash) + IsEqual = IsEqual && (this->getHash() == CodeRegion.getHash()); + } + + if (autotuning::Engine.ParseInput) + IsEqual = IsEqual && this->getInvocation() == CodeRegion.getInvocation(); + + if (autotuning::Engine.GenerateOutput) + IsEqual = + IsEqual && this->getBaselineConfig() == CodeRegion.getBaselineConfig(); + + return IsEqual; +} + +std::string CodeRegion::getTypeAsString(CodeRegionType CRType) { + switch (CRType) { + case autotuning::CodeRegionType::MachineBasicBlock: + return "machine_basic_block"; + case autotuning::CodeRegionType::Loop: + return "loop"; + case autotuning::CodeRegionType::Function: + return "function"; + case autotuning::CodeRegionType::CallSite: + return "callsite"; + case autotuning::CodeRegionType::LLVMParam: + return "llvm-param"; + case autotuning::CodeRegionType::ProgramParam: + return "program-param"; + case autotuning::CodeRegionType::Switch: + return "switch"; + default: + return "other"; + } +} + +std::string CodeRegion::getHotnessAsString(HotnessType Hotness) { + switch (Hotness) { + case autotuning::HotnessType::Cold: + return "cold"; + case autotuning::HotnessType::Hot: + return "hot"; + default: + return "unknown"; + } +} + +void CodeRegion::setPassName(const std::string &NewPassName) { + this->PassName = generateName(NewPassName); +} + +/* static */ +autotuning::CodeRegion CodeRegion::getInvalidInstance() { + static autotuning::CodeRegion Invalid = + CodeRegion(autotuning::CodeRegionType::Invalid); + return Invalid; +} + +/* static */ +autotuning::CodeRegion CodeRegion::getEmptyInstance() { + static autotuning::CodeRegion Empty = + CodeRegion(autotuning::CodeRegionType::Empty); + return Empty; +} + +//===----------------------------------------------------------------------===// +// Container implementation +// + +const CodeRegion &Container::getCodeRegion() const { return CR; } + +void Container::setCodeRegion(const CodeRegion &NewCR) { this->CR = NewCR; } + +template +bool Container::lookUpParams(const std::string &ParamsName, T &Value) const { + bool Found = false; + auto ConfigMapIterator = Engine.ParamTable.find(CR); + if (ConfigMapIterator != Engine.ParamTable.end()) { + ParameterManager InputParams = ConfigMapIterator->second; + Found = InputParams.findByName(ParamsName, Value); + if (Found) { + LLVM_DEBUG(dbgs() << ParamsName << " is set for the CodeRegion: \n" + << " Name: " << CR.getName() << "\n" + << " FuncName: " << CR.getFuncName() << "\n" + << " PassName: " << CR.getPassName() << "\n" + << " Type: " << CR.getTypeAsString() << "\n" + << " Hash: " << CR.getHash() << "\n" + << "\n"); + } + } + return Found; +} + +bool Container::requiresIRDump(bool IsFunctionIR) const { + auto findBaselineRegion = [&]() -> bool { + for (auto &entry : Engine.TuningOpps) + if (!IsFunctionIR) { + if (CR.getSourceLoc() == entry.getSourceLoc()) + return true; + } else { + if (CR.getFileName() == entry.getFileName() && + CR.getFuncName() == entry.getFuncName()) + return true; + } + return false; + }; + auto findNonBaselineRegion = [&]() { + for (auto &entry : Engine.ParamTable) + if (!IsFunctionIR) { + if (CR.getSourceLoc() == entry.first.getSourceLoc()) + return true; + } else { + if (CR.getFileName() == entry.first.getFileName() && + CR.getFuncName() == entry.first.getFuncName()) + return true; + } + return false; + }; + + if (CFGNumber == -1) + return findBaselineRegion(); + else + return findNonBaselineRegion(); +} + +template bool Container::lookUpParams(const std::string &ParamsName, + int &Value) const; +template bool Container::lookUpParams(const std::string &ParamsName, + bool &Value) const; +template bool +Container::lookUpParams(const std::string &ParamsName, + std::string &Value) const; +template bool Container::lookUpParams>( + const std::string &ParamsName, std::vector &Value) const; + +static unsigned int count(SmallVector CallSiteLocs, + CallSiteLocation Loc) { + unsigned int Count = 0; + for (unsigned int Idx = 0; Idx < CallSiteLocs.size(); ++Idx) { + if (Loc.Caller == CallSiteLocs[Idx].Caller && + Loc.Callee == CallSiteLocs[Idx].Callee) + Count++; + } + return Count; +} + +bool AutoTuningEngine::isThinLTOTuning() const { return ThinLTOTuning; } + +CodeRegionType AutoTuningEngine::convertPassToType(std::string PassName) { + auto Search = PTTMap.find(PassName); + if (Search == PTTMap.end()) + llvm_unreachable( + "AutoTuningEngine: Invalid/unsupported optimization pass provided.\n"); + return Search->second; +} + +void AutoTuningEngine::insertCallSiteLoc(CallSiteLocation Loc) { + CallSiteLocs.emplace_back(Loc); +} + +// If a function has multiple calls to same callee, then insert all the calls in +// the CallSiteLocs vector which get available due to inlining of such calls. +// It will use "Original Call Line No + New Call Line No" instead of using +// "DebugLoc Line No". +void AutoTuningEngine::updateCallSiteLocs(llvm::CallBase *OldCB, + llvm::CallBase *NewCB, + llvm::Function *Callee, + unsigned int Line) { + for (unsigned int Idx = 0; Idx < CallSiteLocs.size(); ++Idx) { + if (OldCB == CallSiteLocs[Idx].CB) { + CallSiteLocation Loc = CallSiteLocs[Idx]; + Loc.CB = NewCB; + Loc.Callee = Callee; + Loc.SrcLoc.SourceLine = Loc.SrcLoc.SourceLine + Line; + CallSiteLocs.emplace_back(Loc); + break; + } + } +} + +void AutoTuningEngine::cleanCallSiteLoc() { + unsigned int Size = CallSiteLocs.size(); + unsigned int Idx = 0; + for (unsigned int I = 0; I < Size; ++I) { + CallSiteLocation Loc = CallSiteLocs[Idx]; + unsigned int Count = count(CallSiteLocs, Loc); + if (Count == 1) { + CallSiteLocs.erase(CallSiteLocs.begin() + Idx); + continue; + } + Idx++; + } +} + +void AutoTuningEngine::clearCallSiteLocs() { CallSiteLocs.clear(); } + +std::optional +AutoTuningEngine::getCallSiteLoc(llvm::CallBase *CB) { + for (unsigned int Idx = 0; Idx < CallSiteLocs.size(); ++Idx) { + if (CB == CallSiteLocs[Idx].CB) + return CallSiteLocs[Idx].SrcLoc.SourceLine; + } + return std::nullopt; +} + +void AutoTuningEngine::addOpportunity( + const CodeRegion &OppCR, + std::map BaselineConfig) { + if (!OppCR.Initialized) + return; + + OppCR.setBaselineConfig(BaselineConfig); + if (!TuningOpps.contains(OppCR)) + TuningOpps.insert(OppCR); + else if (OppCR.getHotness() != Unknown) { + // If OppCR already exists in TuningOpps with unknown hotness, + // then update it if the current hotness is hot/cold. + auto OppI = find(TuningOpps, OppCR); + if (OppI->getHotness() == Unknown) + OppI->setHotness(OppCR.getHotness()); + } +} + +void AutoTuningEngine::applyOppFilters(CodeRegions &CRs) { + CodeRegions NewCRs; + for (CodeRegion CR : CRs) { + if (AutotuningOutputFilter.getNumOccurrences() > 0) { + bool IsMatched = false; + for (auto CRType : AutotuningOutputFilter) { + if (CRType == CR.getType()) { + IsMatched = true; + break; + } + } + // Filter out the CodeRegion if its type fails to match any types + // specified from the command line. + if (!IsMatched) + continue; + } + if (SizeThreshold.getNumOccurrences() > 0 && CR.getSize() < SizeThreshold) + continue; + if (ExcludeColdCodeRegion && CR.isCold()) { + LLVM_DEBUG(dbgs() << "Skip CodeRegion with cold function " + << CR.getFuncName() << "\n"); + continue; + } + if (HotCodeRegionOnly && !CR.isHot()) { + LLVM_DEBUG(dbgs() << "Skip CodeRegion with " << CR.getHotnessAsString() + << " function " << CR.getFuncName() << "\n"); + continue; + } + NewCRs.insert(CR); + LLVM_DEBUG(dbgs() << "CodeRegion added as an tuning opportunity: \n" + << " Name: " << CR.getName() << "\n" + << " FuncName: " << CR.getFuncName() << "\n" + << " PassName: " << CR.getPassName() << "\n" + << " Type: " << CR.getTypeAsString() << "\n" + << " Size: " << CR.getSize() << "\n" + << " Hotness: " << CR.getHotnessAsString() << "\n" + << " Hash: " << CR.getHash() << "\n" + << " Location: " << CR.getSourceLoc().SourceFilePath + << "; " << CR.getSourceLoc().SourceLine << "; " + << CR.getSourceLoc().SourceColumn << "\n\n"); + } + if (AutotuningOutputFilter.getNumOccurrences() == 0 || + std::find(AutotuningOutputFilter.begin(), AutotuningOutputFilter.end(), + Other) != AutotuningOutputFilter.end()) { + // Add an empty CodeRegion with ModuleID as an tuning opportunity. + // It could be used to represent a module level code region. + autotuning::CodeRegion GlobalCR = + CodeRegion(ModuleID, "none", "all", Other); + GlobalCR.setHash(llvm::hash_combine(ModuleID, Other)); + NewCRs.insert(GlobalCR); + LLVM_DEBUG(dbgs() << "Module added as an tuning opportunity: \n" + << " Name: " << GlobalCR.getName() << "\n" + << " Hash: " << GlobalCR.getHash() << "\n" + << "\n"); + } + + // Include LLVMParam as an tuning opportunity only if it is specified with + // -auto-tuning-type-filter. + if (std::find(AutotuningOutputFilter.begin(), AutotuningOutputFilter.end(), + LLVMParam) != AutotuningOutputFilter.end()) + NewCRs.insert(CodeRegion(ModuleID, "none", "none", LLVMParam)); + + if (std::find(AutotuningOutputFilter.begin(), AutotuningOutputFilter.end(), + ProgramParam) != AutotuningOutputFilter.end()) + NewCRs.insert(CodeRegion(ModuleID, "none", "none", ProgramParam)); + + CRs = NewCRs; +} + +bool AutoTuningEngine::applyFunctionFilter(std::string FuncName) { + if (AutotuningFunctionFilter.getNumOccurrences() == 0) + return true; + + for (std::string FunctionFilter : AutotuningFunctionFilter) + if (FuncName == FunctionFilter) + return true; + + return false; +} + +void AutoTuningEngine::initContainer(Container *Container, + const std::string &PassName, + const StringRef FuncName, + bool AddOpportunity, + unsigned int Invocation) { + if (Enabled) { + if (!isTuningAllowedForType(convertPassToType(PassName)) && + !(isGenerateOutput() && + AutotuningOutputFilter.getNumOccurrences() == 0)) + return; + + if (!applyFunctionFilter(FuncName.str())) + return; + + // The attributes of a Container could potentially change overtime even with + // the same pass if the associated pass is invoked multiple times at + // different places in the pipeline. Therefore, we need to initCodeRegion + // every time when this function is called to ensure the CodeRegion with the + // latest information will be added as tuning opportunities. + Container->initCodeRegion(); + if (Container->CR.getType() == autotuning::CodeRegionType::Invalid) + return; + + uint64_t hash = Container->computeStructuralHash(); + CodeRegion &OppCR = Container->CR; + if (GenerateOutput) { + if (OppCR.getSize() < SizeThreshold) + return; + if (ExcludeColdCodeRegion && OppCR.isCold()) { + LLVM_DEBUG(dbgs() << "Skip CodeRegion with cold function " + << OppCR.getFuncName() << "\n"); + return; + } + if (HotCodeRegionOnly && !OppCR.isHot()) { + LLVM_DEBUG(dbgs() << "Skip CodeRegion with " + << OppCR.getHotnessAsString() << " function " + << OppCR.getFuncName() << "\n"); + return; + } + } + OppCR.setPassName(PassName); + OppCR.setHash(hash); + OppCR.setInvocation(Invocation); + OppCR.Initialized = true; + if (AddOpportunity) + addOpportunity(OppCR); + } +} + +bool AutoTuningEngine::shouldRunOptPass(std::string Filename, + std::string Pass) { + return OppPassList.count(Filename) ? OppPassList[Filename].count(Pass) + : false; +} + +Error AutoTuningEngine::init(const std::string &Module) { + ParseInput = false; + if (std::optional MaybePath = + llvm::sys::Process::GetEnv("AUTOTUNE_INPUT")) { + InputFile = *MaybePath; + ParseInput = true; + } else if (InputFile.getNumOccurrences() > 0) { + ParseInput = true; + } + + GenerateOutput = false; + if (OutputOppDir.getNumOccurrences() > 0) + GenerateOutput = true; + + // Invocation of any of the following command line options + // (auto-tuning-input and auto-tuning-opp) or env variable + // AUTOTUNE_ALL_INPUT can enable auto-tuning mode. + if (ParseInput || GenerateOutput) { + Enabled = true; + // Generate absolute path and remove the base directory (if available). + // A relative path will be used as (coarse-grain) code region name. + llvm::SmallString<128> ModuleVec = StringRef(Module); + llvm::sys::fs::make_absolute(ModuleVec); + if (ProjectDir.size() && ModuleVec.startswith(ProjectDir)) + ModuleID = ModuleVec.substr(ProjectDir.size()).str(); + else + ModuleID = std::string(ModuleVec); + } + + // Initialization of map to be used for pass-name to CodeRegionType + // conversion. + PTTMap = {{"loop-unroll", Loop}, + {"loop-vectorize", Loop}, + {"inline", CallSite}, + {"machine-scheduler", MachineBasicBlock}, + {"switch-lowering", Switch}, + {"autotuning-dump", Function}}; + + if (ParseInput) { + // Currently we only support yaml format for input. + if (Error E = AutoTuningRemarkManager::read(*this, InputFile, "yaml")) { + errs() << "Error parsing auto-tuning input.\n"; + return E; + } else { + LLVM_DEBUG(dbgs() << "AutoTuningEngine is initialized.\n" + << " Size of ParamTable: " << this->ParamTable.size() + << "\n"); + if (LLVMParams.size()) + LLVM_DEBUG(dbgs() << "AutoTuner: LLVMParams applied."); + if (ProgramParams.size()) + LLVM_DEBUG(dbgs() << "AutoTuner: ProgramParams applied.\n"); + } + } + + for (auto CRType : AutotuningOutputFilter) + CodeRegionFilterTypes.insert(CRType); + + if (GenerateOutput) { + switch (AutoTuningCompileMode) { + case CoarseGrain: { + bool Valid = false; + if (AutotuningOutputFilter.getNumOccurrences() > 0) { + Valid = true; + for (auto CRType : AutotuningOutputFilter) + if (CRType != LLVMParam) { + Valid = false; + break; + } + } + if (!Valid) { + AutoTuningCompileMode = Inactive; + errs() << "AutoTunerCompile: Code region type filtering does not match" + " with incremental compilation option.\n" + "Disabling incremental compilation.\n"; + } + break; + } + case FineGrain: { + bool Valid = false; + if (AutotuningOutputFilter.getNumOccurrences() > 0) { + Valid = true; + for (auto CRType : AutotuningOutputFilter) { + if (CRType != Loop && CRType != CallSite && CRType != Function) { + Valid = false; + break; + } + } + } + if (!Valid) { + AutoTuningCompileMode = Inactive; + errs() << "AutoTunerCompile: Code region type filtering does not match" + "with incremental compilation option.\n" + "Disabling incremental compilation.\n"; + } + break; + } + case Basic: + case Inactive: + break; + default: + llvm_unreachable("AutoTuningCompile: Unknown AutoTuner Incremental " + "Compilation mode.\n"); + } + } + + MLEnabled = (CFGNumber.getNumOccurrences() > 0); + if (EnableAutoTuningDump || MLEnabled) + DumpEnabled = true; + return Error::success(); +} + +llvm::Expected AutoTuningEngine::getConfigNumber() { + if (!isMLEnabled()) { + std::string errorMsg = + "No Autotuner configuration specified; ML guidance is unavailable."; + return createStringError(inconvertibleErrorCode(), errorMsg); + } else + return CFGNumber; +} + +Error AutoTuningEngine::finalize() { + if (OutputOppDir.getNumOccurrences() > 0) { + // Apply filters. + applyOppFilters(TuningOpps); + if (!TuningOpps.empty()) { + if (Error E = AutoTuningRemarkManager::dump( + *this, OutputOppDir, OutputFormat, RemarksPasses)) { + errs() << "Error generating auto-tuning opportunities.\n"; + return E; + } + } + + // Clear these two global lists when ending the auto-tuning + // in case of redundant information + TuningOpps.clear(); + } + return Error::success(); +} + +template +bool AutoTuningEngine::lookUpGlobalParams(const std::string &ParamsName, + T &Value) const { + bool Found = GlobalParams.findByName(ParamsName, Value); + if (Found) { + LLVM_DEBUG(dbgs() << "Global Variable " << ParamsName << " is set.\n"); + } + return Found; +} + +template bool +AutoTuningEngine::lookUpGlobalParams(const std::string &ParamsName, + int &Value) const; +template bool +AutoTuningEngine::lookUpGlobalParams(const std::string &ParamsName, + bool &Value) const; +template bool +AutoTuningEngine::lookUpGlobalParams(const std::string &ParamsName, + std::string &Value) const; +template bool AutoTuningEngine::lookUpGlobalParams>( + const std::string &ParamsName, std::vector &Value) const; + +class AutoTuningEngine Engine; + +} // namespace autotuning + +#endif diff --git a/llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp b/llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3e0506e534c44cff3422c0e4e8f510ad18093f24 --- /dev/null +++ b/llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp @@ -0,0 +1,299 @@ +#if defined(ENABLE_AUTOTUNER) +//===- llvm/AutoTuner/AutoTuningRemarkManager.cpp - Remark Manager --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of for inputting and outputting remarks +// for AutoTuning. +// +//===----------------------------------------------------------------------===// + +#include "llvm/AutoTuner/AutoTuningRemarkManager.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/AutoTuner/AutoTuningRemarkStreamer.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/LLVMRemarkStreamer.h" +#include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkFormat.h" +#include "llvm/Remarks/RemarkParser.h" +#include "llvm/Remarks/RemarkSerializer.h" +#include "llvm/Remarks/RemarkStreamer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/ToolOutputFile.h" + +// Enable debug messages for AutoTuner. +#define DEBUG_TYPE "autotuning" + +using namespace llvm; +using namespace autotuning; + +// Helper functions. +namespace { +// Convert string into CodeRegionType. +Expected StringToCodeRegionType(const std::string &CRType) { + if (CRType == "machine_basic_block") + return autotuning::CodeRegionType::MachineBasicBlock; + else if (CRType == "loop") + return autotuning::CodeRegionType::Loop; + else if (CRType == "function") + return autotuning::CodeRegionType::Function; + else if (CRType == "callsite") + return autotuning::CodeRegionType::CallSite; + else if (CRType == "llvm-param") + return autotuning::CodeRegionType::LLVMParam; + else if (CRType == "program-param") + return autotuning::CodeRegionType::ProgramParam; + else if (CRType == "switch") + return autotuning::CodeRegionType::Switch; + else if (CRType == "other") + return autotuning::CodeRegionType::Other; + else + return make_error("Unsupported CodeRegionType:" + CRType, + inconvertibleErrorCode()); +} + +// Remark -> autotuning::ParameterManager +ParameterManager RemarkToParameterManager(const remarks::Remark &Remark) { + // Create Parameters from a remark. + ParameterManager ParamManager; + for (const remarks::Argument &Arg : Remark.Args) { + int Value = 0; + if (!Arg.Val.getAsInteger(10, Value)) + // If no errors + ParamManager.add(Arg.Key.str(), Value); + else if (Arg.Val == "true") + ParamManager.add(Arg.Key.str(), true); + else if (Arg.Val == "false") + ParamManager.add(Arg.Key.str(), false); + // If there is a value of vector type + else if (Arg.VectorVal) { + std::vector Strings; + for (const StringRef &Val : *Arg.VectorVal) { + Strings.push_back(Val.str()); + } + ParamManager.add(Arg.Key.str(), Strings); + } else + // Add as String Value + ParamManager.add(Arg.Key.str(), Arg.Val); + } + + return ParamManager; +} + +// Remark -> std::unordered_map +std::unordered_map +RemarkToStringMap(const remarks::Remark &Remark) { + std::unordered_map LLVMParams; + for (const remarks::Argument &Arg : Remark.Args) { + // Add as String Value + LLVMParams[Arg.Key.str()] = Arg.Val.str(); + } + return LLVMParams; +} + +// Remark -> autotuning::SourceLocation +SourceLocation RemarkToSourceLocation(const remarks::Remark &Remark) { + SourceLocation Location; + if (Remark.Loc) { + StringRef File = Remark.Loc->SourceFilePath; + unsigned Line = Remark.Loc->SourceLine; + unsigned Column = Remark.Loc->SourceColumn; + Location = {File.str(), Line, Column}; + } + return Location; +} + +// Remark -> autotuning::CodeRegion +CodeRegion RemarkToCodeRegion(const remarks::Remark &Remark, + Expected &Type) { + // Create a SourceLocation from a remark. + SourceLocation Location = RemarkToSourceLocation(Remark); + // Create a CodeRegion from a remark. + CodeRegion CR = CodeRegion(Remark.RemarkName.str(), Remark.FunctionName.str(), + Remark.PassName.str(), Type.get(), Location); + if (Remark.CodeRegionHash) + CR.setHash(Remark.CodeRegionHash.value_or(0)); + if (Remark.Invocation) + CR.setInvocation(Remark.Invocation.value_or(0)); + + return CR; +} + +Expected> emitAutoTuningRemarks( + const StringRef RemarksFilename, const StringRef RemarksFormat, + const StringRef RemarksPasses, const CodeRegions &CRList) { + if (RemarksFilename.empty()) + return nullptr; + // Parse remark format. Options are yaml, yaml-strtab and bitstream. + Expected Format = remarks::parseFormat(RemarksFormat); + if (Error E = Format.takeError()) + return make_error(std::move(E)); + + std::error_code EC; + auto Flags = + *Format == remarks::Format::YAML ? sys::fs::OF_Text : sys::fs::OF_None; + auto RemarksFile = + std::make_unique(RemarksFilename, EC, Flags); + if (EC) + return make_error(errorCodeToError(EC)); + // Create a remark serializer to emit code regions. + Expected> RemarkSerializer = + remarks::createRemarkSerializer( + *Format, remarks::SerializerMode::Separate, RemarksFile->os()); + + if (Error E = RemarkSerializer.takeError()) + return make_error(std::move(E)); + // Create remark streamer based on the serializer. + remarks::RemarkStreamer RStreamer = + remarks::RemarkStreamer(std::move(*RemarkSerializer), RemarksFilename); + AutoTuningRemarkStreamer Streamer(RStreamer); + + if (!RemarksPasses.empty()) + if (Error E = Streamer.setFilter(RemarksPasses)) + return make_error(std::move(E)); + // Emit CodeRegions in Remark format. + for (const CodeRegion &CR : CRList) { + Streamer.emit(CR); + } + return std::move(RemarksFile); +} +} // namespace + +llvm::Error AutoTuningRemarkManager::read(AutoTuningEngine &E, + const std::string &InputFileName, + const std::string &RemarksFormat) { + ErrorOr> Buf = + MemoryBuffer::getFile(InputFileName.c_str()); + if (std::error_code EC = Buf.getError()) + return make_error( + "Can't open file " + InputFileName + ": " + EC.message(), EC); + // Parse remark format. Options are yaml, yaml-strtab and bitstream. + Expected Format = remarks::parseFormat(RemarksFormat); + if (!Format) + return Format.takeError(); + + Expected> MaybeParser = + remarks::createRemarkParserFromMeta(*Format, (*Buf)->getBuffer()); + if (!MaybeParser) { + return MaybeParser.takeError(); + } + remarks::RemarkParser &Parser = **MaybeParser; + + while (true) { + Expected> MaybeRemark = Parser.next(); + if (!MaybeRemark) { + Error E = MaybeRemark.takeError(); + if (E.isA()) { + // EOF. + consumeError(std::move(E)); + break; + } + return E; + } + const remarks::Remark &Remark = **MaybeRemark; + + if (Remark.RemarkType != remarks::Type::AutoTuning) + continue; + + if (!Remark.CodeRegionType) + return make_error("CodeRegionType field is missing.", + inconvertibleErrorCode()); + Expected Type = + StringToCodeRegionType((*Remark.CodeRegionType).str()); + if (!Type) + return Type.takeError(); + CodeRegionType CRType = Type.get(); + // If CodeRegionType is Other, this remark corresponds to global + // parameters, and no need to create a CodeRegion object. Check if the + // Remark of global parameters is for the current Module. + if (CRType == autotuning::Other && Remark.RemarkName == Engine.ModuleID) { + Engine.GlobalParams = RemarkToParameterManager(Remark); + continue; + } + if (CRType == autotuning::LLVMParam && + Remark.RemarkName == Engine.ModuleID) { + Engine.LLVMParams = RemarkToStringMap(Remark); + continue; + } + if (CRType == autotuning::ProgramParam && + Remark.RemarkName == Engine.ModuleID) { + Engine.ProgramParams = RemarkToStringMap(Remark); + continue; + } + if (Engine.isThinLTOTuning() && + (CRType == autotuning::CallSite || CRType == autotuning::Loop || + CRType == autotuning::MachineBasicBlock || + CRType == autotuning::Function)) { + LLVM_DEBUG(dbgs() << "AutoTuner does not support tuning of " + << CodeRegion::getTypeAsString(CRType) + << " for thinLTO durning link-time optimization. " + "Ignoring current code region.\n"); + continue; + } + // Create a SourceLocation from a remark. + CodeRegion CR = RemarkToCodeRegion(Remark, Type); + ParameterManager ParamManager = RemarkToParameterManager(Remark); + // Add the CodeRegion-ParameterManager entry into LoopUpTable. + Engine.ParamTable[CR] = ParamManager; + + std::string Filename = CR.getSourceLoc().SourceFilePath; + size_t Pos = Filename.rfind("."); + if (Pos != std::string::npos) + Filename.erase(Pos, Filename.size()); + Engine.OppPassList[Filename].insert(CR.getPassName()); + Engine.CodeRegionFilterTypes.insert(CR.getType()); + } + return Error::success(); +} + +Error AutoTuningRemarkManager::dump(const autotuning::AutoTuningEngine &E, + const std::string &DirName, + const std::string &RemarksFormat, + const std::string &RemarksPasses) { + // Change to absolute path. + SmallString<256> OutputPath = StringRef(DirName); + sys::fs::make_absolute(OutputPath); + + // Make sure the new output directory exists, creating it if necessary. + if (std::error_code EC = sys::fs::create_directories(OutputPath)) { + return make_error("could not create directory: " + + Twine(OutputPath) + ": " + EC.message(), + EC); + } + if (!Engine.TuningOpps.empty()) { + StringRef ModelFileName = sys::path::filename(Engine.ModuleID); + sys::path::append(OutputPath, ModelFileName + "." + RemarksFormat); + + int i = 1; // Output file suffix starts from 1. + // Check all exiting xml files xml.1...i and create a new file + // suffix.(i+1). + while (sys::fs::exists(OutputPath)) { + sys::path::remove_filename(OutputPath); + sys::path::append(OutputPath, + ModelFileName + "." + RemarksFormat + "." + Twine(i)); + i += 1; + } + Expected> RemarksFileOrErr = + emitAutoTuningRemarks(OutputPath, RemarksFormat, RemarksPasses, + Engine.TuningOpps); + if (Error E = RemarksFileOrErr.takeError()) { + return E; + } + + std::unique_ptr RemarksFile = std::move(*RemarksFileOrErr); + if (RemarksFile) + RemarksFile->keep(); + } + return Error::success(); +} + +#endif diff --git a/llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp b/llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0516c055a139ada1fcefc638729c3eb266fe699a --- /dev/null +++ b/llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp @@ -0,0 +1,55 @@ +#if defined(ENABLE_AUTOTUNER) +// ===---------- llvm/AutoTuner/AutoTuningRemarkStreamer.cpp --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved. +// +// ===---------------------------------------------------------------------===// +// +// This file contains the implementation of the conversion between AutoTuner +// CodeRegions and serializable remarks::Remark objects. +// +// ===---------------------------------------------------------------------===// + +#include "llvm/AutoTuner/AutoTuningRemarkStreamer.h" + +using namespace llvm; + +// autotuning::CodeRegion -> Remark +remarks::Remark +AutoTuningRemarkStreamer::toRemark(const autotuning::CodeRegion &CR) { + remarks::Remark R; // The result. + R.RemarkType = remarks::Type::AutoTuning; + R.PassName = CR.getPassName(); + R.RemarkName = CR.getName(); + R.FunctionName = CR.getFuncName(); + const autotuning::SourceLocation &Location = CR.getSourceLoc(); + if (Location) + R.Loc = remarks::RemarkLocation{Location.SourceFilePath, + Location.SourceLine, Location.SourceColumn}; + R.CodeRegionType = CR.getTypeAsString(); + R.CodeRegionHash = CR.getHash(); + R.AutoTunerOptions = CR.getAutoTunerOptions(); + R.Invocation = CR.getInvocation(); + R.BaselineConfig = CR.getBaselineConfig(); + return R; +} + +void AutoTuningRemarkStreamer::emit(const autotuning::CodeRegion &CR) { + if (!RS.matchesFilter(CR.getPassName())) + return; + + // First, convert the code region to a remark. + remarks::Remark R = toRemark(CR); + // Then, emit the remark through the serializer. + RS.getSerializer().emit(R); +} + +Error AutoTuningRemarkStreamer::setFilter(StringRef Filter) { + return RS.setFilter(Filter); +} +#endif diff --git a/llvm/lib/AutoTuner/CMakeLists.txt b/llvm/lib/AutoTuner/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c618474fe5aece1bf862fdf2e8c889c5a789922d --- /dev/null +++ b/llvm/lib/AutoTuner/CMakeLists.txt @@ -0,0 +1,11 @@ +add_llvm_component_library(LLVMAutoTuner + AutoTuning.cpp + AutoTuningRemarkManager.cpp + AutoTuningRemarkStreamer.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/AutoTuner + + LINK_COMPONENTS + Remarks +) \ No newline at end of file diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt index 283baa6090eb9a316b221fe6cb392b205adcf69c..966137c0f71f8a9d291a108c78338dbd18138c82 100644 --- a/llvm/lib/CMakeLists.txt +++ b/llvm/lib/CMakeLists.txt @@ -28,6 +28,7 @@ add_subdirectory(Object) add_subdirectory(ObjectYAML) add_subdirectory(Option) add_subdirectory(Remarks) +add_subdirectory(AutoTuner) add_subdirectory(Debuginfod) add_subdirectory(DebugInfo) add_subdirectory(DWP) diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 106571b9061bebd5aaa8cd446005d5fbf4e86648..9029dc7bb3d9af0a8170838b9f67ca631d3c22fe 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -273,6 +273,7 @@ add_llvm_component_library(LLVMCodeGen LINK_COMPONENTS Analysis + AutoTuner BitReader BitWriter CodeGenTypes diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 5a005ba7b414d6173da2e6fb5cf21673793b644e..c7eb2e2e4f15775a6ee78edd13c90d6e12c88362 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -29,6 +29,22 @@ using namespace llvm; #define DEBUG_TYPE "calcspillweights" +static cl::opt LoopWeight( + "reg-spill-loop-weight", cl::Hidden, + cl::desc( + "Tunable extra weight to what looks like a loop induction variable"), + cl::init(3)); + +static cl::opt RemaWeight( + "reg-spill-rematerialize-weight", cl::Hidden, + cl::desc("Tunable reduced weight giving re-materialize oppotunities"), + cl::init(0.5f)); + +static cl::opt + HintWeight("reg-spill-hint-weight", cl::Hidden, + cl::desc("Tunable weakly boost weight of hinted registers"), + cl::init(1.01f)); + void VirtRegAuxInfo::calculateSpillWeightsAndHints() { LLVM_DEBUG(dbgs() << "********** Compute Spill Weights **********\n" << "********** Function: " << MF.getName() << '\n'); @@ -252,7 +268,11 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, // Give extra weight to what looks like a loop induction variable update. if (Writes && IsExiting && LIS.isLiveOutOfMBB(LI, MBB)) +#if defined(ENABLE_AUTOTUNER) + Weight *= LoopWeight; +#else Weight *= 3; +#endif TotalWeight += Weight; } @@ -288,7 +308,11 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, } // Weakly boost the spill weight of hinted registers. +#if defined(ENABLE_AUTOTUNER) + TotalWeight *= HintWeight; +#else TotalWeight *= 1.01F; +#endif } // If the live interval was already unspillable, leave it that way. @@ -315,7 +339,11 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, // FIXME: this gets much more complicated once we support non-trivial // re-materialization. if (isRematerializable(LI, LIS, VRM, *MF.getSubtarget().getInstrInfo())) +#if defined(ENABLE_AUTOTUNER) + TotalWeight *= RemaWeight; +#else TotalWeight *= 0.5F; +#endif if (IsLocalSplitArtifact) return normalize(TotalWeight, Start->distance(*End), NumInstr); diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 231544494c3286647b468cd595f393c11c3e450f..327cd40f86a468d23b4317c3aa9ca18577480292 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -37,6 +37,9 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/IR/StructuralHash.h" +#endif #include using namespace llvm; @@ -1703,6 +1706,39 @@ MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const { return LiveIns.begin(); } +#if defined(ENABLE_AUTOTUNER) +uint64_t MachineBasicBlock::computeStructuralHash() { + return StructuralHash(*this); +} + +void MachineBasicBlock::initCodeRegion() { + std::string BasicBlockName = + ("%bb." + Twine(this->getNumber()) + ":" + this->getName()).str(); + MachineFunction *MF = this->getParent(); + StringRef FuncName = MF->getName(); + + autotuning::CodeRegion CR; + if (!this->empty()) { + const DebugLoc &StartLoc = this->front().getDebugLoc(); + CR = autotuning::CodeRegion(BasicBlockName, FuncName.data(), + autotuning::CodeRegionType::MachineBasicBlock, + StartLoc); + } else { + CR = autotuning::CodeRegion(BasicBlockName, FuncName.data(), + autotuning::CodeRegionType::MachineBasicBlock); + } + // Compute the number of non-debug IR instructions in this MBB. + unsigned NumInstrs = std::distance(this->getFirstNonDebugInstr(), + this->getLastNonDebugInstr()); + CR.setSize(NumInstrs); + // Compute hotness. + autotuning::HotnessType Hotness = MF->getFunction().ATEFunction.getHotness(); + CR.setHotness(Hotness); + + this->setCodeRegion(CR); +} +#endif + MachineBasicBlock::liveout_iterator MachineBasicBlock::liveout_begin() const { const MachineFunction &MF = *getParent(); assert(MF.getProperties().hasProperty( diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index ba5432459d1282b8fe827dfec0d6f6f34521b7cb..caccc9e5fad444bde78721c6ba61ee1aea899324 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -569,6 +569,12 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); MBB != MBBEnd; ++MBB) { +#if defined(ENABLE_AUTOTUNER) + // before visiting this MBB + // if AutoTuning is enabled, initialize this MBB for auto-tuning + autotuning::Engine.initContainer(&*MBB, DEBUG_TYPE); +#endif + Scheduler.startBlock(&*MBB); #ifndef NDEBUG @@ -3244,6 +3250,44 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, RegionPolicy.ShouldTrackLaneMasks = false; } +#if defined(ENABLE_AUTOTUNER) + // AUTO-TUNING - Look up for MMB level scheduling direction if AutoTuning is + // enabled + if (autotuning::Engine.isEnabled()) { + MachineBasicBlock &MBB = *Begin->getParent(); + + bool NewForceBottomUp = false; + // Look up from xml file, and overwrite values + bool IsForceBottomUpSet = + MBB.lookUpParams("ForceBottomUp", NewForceBottomUp); + + bool NewForceForceTopDown = false; + bool IsForceTopDownSet = + MBB.lookUpParams("ForceTopDown", NewForceForceTopDown); + + assert((!NewForceBottomUp || !NewForceForceTopDown) && + "BottomUp and TopDown cannot both set to true"); + + if (IsForceBottomUpSet) { + RegionPolicy.OnlyBottomUp = NewForceBottomUp; + if (RegionPolicy.OnlyBottomUp) { + RegionPolicy.OnlyTopDown = false; + } + } + + if (IsForceTopDownSet) { + RegionPolicy.OnlyTopDown = NewForceForceTopDown; + if (RegionPolicy.OnlyTopDown) { + RegionPolicy.OnlyBottomUp = false; + } + } + + if (IsForceBottomUpSet || IsForceTopDownSet) { + return; + } + } +#endif + // Check -misched-topdown/bottomup can force or unforce scheduling direction. // e.g. -misched-bottomup=false allows scheduling in both directions. assert((!ForceTopDown || !ForceBottomUp) && diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index 36a02d5beb4b240f4dee9739ab61b06084ad858f..8796c1b1f3ecd160876660b5258592e474d63b8b 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -11,11 +11,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/AutoTuner/AutoTuning.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif using namespace llvm; using namespace SwitchCG; @@ -61,7 +65,23 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters, if (!TLI->areJTsAllowed(SI->getParent()->getParent())) return; +#if defined(ENABLE_AUTOTUNER) + unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries(); + // Overwrite MinJumpTableEntries when it is set by Autotuner + if (autotuning::Engine.isEnabled()) { + autotuning::Engine.initContainer(SI->ATESwitchInst.get(), + "switch-lowering"); + + int NewValue = 0; // the int value is set by lookUpParams() + bool Changed = + SI->ATESwitchInst->lookUpParams("MinJumpTableEntries", NewValue); + if (Changed) + MinJumpTableEntries = NewValue; + } +#else const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries(); +#endif + const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2; // Bail if not enough cases. diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index df753b91ff903bb0dec6d0082b97bfeef9704186..af77e6c2dc4d55fcf387f85a32eea583f75a3773 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -2602,11 +2602,21 @@ public: void writeAllAttributeGroups(); void printTypeIdentities(); +#if defined(ENABLE_AUTOTUNER) + void printGlobal(const GlobalVariable *GV, bool PrintDeclarationOnly = false); + void printAlias(const GlobalAlias *GA); + void printIFunc(const GlobalIFunc *GI); + void printComdat(const Comdat *C); + void printRequisiteDeclarations(const Function *F); + void printFunction(const Function *F, bool PrintCompleteIR = false, + bool PrintDeclarationOnly = false); +#else void printGlobal(const GlobalVariable *GV); void printAlias(const GlobalAlias *GA); void printIFunc(const GlobalIFunc *GI); void printComdat(const Comdat *C); void printFunction(const Function *F); +#endif void printArgument(const Argument *FA, AttributeSet Attrs); void printBasicBlock(const BasicBlock *BB); void printInstructionLine(const Instruction &I); @@ -3593,15 +3603,26 @@ static void maybePrintComdat(formatted_raw_ostream &Out, Out << ')'; } +#if defined(ENABLE_AUTOTUNER) +void AssemblyWriter::printGlobal(const GlobalVariable *GV, + bool PrintDeclarationOnly) { + if (GV->isMaterializable() && !PrintDeclarationOnly) +#else void AssemblyWriter::printGlobal(const GlobalVariable *GV) { if (GV->isMaterializable()) +#endif Out << "; Materializable\n"; AsmWriterContext WriterCtx(&TypePrinter, &Machine, GV->getParent()); WriteAsOperandInternal(Out, GV, WriterCtx); Out << " = "; +#if defined(ENABLE_AUTOTUNER) + if ((!GV->hasInitializer() || PrintDeclarationOnly) && + GV->hasExternalLinkage()) +#else if (!GV->hasInitializer() && GV->hasExternalLinkage()) +#endif Out << "external "; Out << getLinkageNameWithSpace(GV->getLinkage()); @@ -3619,7 +3640,11 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { Out << (GV->isConstant() ? "constant " : "global "); TypePrinter.print(GV->getValueType(), Out); +#if defined(ENABLE_AUTOTUNER) + if (GV->hasInitializer() && !PrintDeclarationOnly) { +#else if (GV->hasInitializer()) { +#endif Out << ' '; writeOperand(GV->getInitializer(), false); } @@ -3769,12 +3794,102 @@ void AssemblyWriter::printTypeIdentities() { } } +#if defined(ENABLE_AUTOTUNER) +/// printRequisiteDeclarations - Print the declarations of type identities, +/// global variables, functions, and function attribute groups of a function. +void AssemblyWriter::printRequisiteDeclarations(const Function *F) { + // walk through instructions and collect global variables & functions + SmallPtrSet GVs; + SmallPtrSet Functions; + for (const BasicBlock &BB : *F) { + for (const Instruction &I : BB) { + // Check for function + if (const auto *CI = dyn_cast(&I)) { + Function *func = CI->getCalledFunction(); + if (func) + Functions.insert(func); + } + // Check for global variables + for (const Use &U : I.operands()) { + if (GlobalVariable *gv = dyn_cast(U)) + GVs.insert(gv); + if (GEPOperator *gepo = dyn_cast(&U)) { + if (GlobalVariable *gv = + dyn_cast(gepo->getPointerOperand())) + GVs.insert(gv); + for (auto it = gepo->idx_begin(), et = gepo->idx_end(); it != et; + ++it) { + if (GlobalVariable *gv = dyn_cast(*it)) + GVs.insert(gv); + } + } + } + } + } + + // print type identities + printTypeIdentities(); + + // print global variables + if (!GVs.empty()) { + Out << '\n'; + for (auto GVit = GVs.begin(), et = GVs.end(); GVit != et; ++GVit) { + // Make backups of some properties. They may be modified for printing. + GlobalValue::LinkageTypes SavedLinkage = (*GVit)->getLinkage(); + GlobalVariable::VisibilityTypes SavedVisibility = + (*GVit)->getVisibility(); + + // modify property if needed + if (!(*GVit)->hasAvailableExternallyLinkage() && + !((*GVit)->getName() == "llvm.global_ctors") && + (*GVit)->hasLocalLinkage()) { + (*GVit)->setLinkage(GlobalValue::ExternalLinkage); + (*GVit)->setVisibility(GlobalValue::HiddenVisibility); + } + + printGlobal(*GVit, true); + Out << '\n'; + + // restore backups + (*GVit)->setLinkage(SavedLinkage); + (*GVit)->setVisibility(SavedVisibility); + } + Out << '\n'; + } + + // print functions + for (auto FuncIt = Functions.begin(), et = Functions.end(); FuncIt != et; + ++FuncIt) { + Out << '\n'; + printFunction(*FuncIt, false, true); + } + + // Write attribute groups. + if (!Machine.as_empty()) { + Out << '\n'; + writeAllAttributeGroups(); + } + Out << '\n'; +} + /// printFunction - Print all aspects of a function. +void AssemblyWriter::printFunction(const Function *F, bool PrintCompleteIR, + bool PrintDeclarationOnly) { + if (PrintCompleteIR && !PrintDeclarationOnly) { + printRequisiteDeclarations(F); + } + if (AnnotationWriter && !PrintDeclarationOnly) + AnnotationWriter->emitFunctionAnnot(F, Out); + + if (F->isMaterializable() && !PrintDeclarationOnly) + Out << "; Materializable\n"; +#else void AssemblyWriter::printFunction(const Function *F) { if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out); if (F->isMaterializable()) Out << "; Materializable\n"; +#endif const AttributeList &Attrs = F->getAttributes(); if (Attrs.hasFnAttrs()) { @@ -3792,6 +3907,18 @@ void AssemblyWriter::printFunction(const Function *F) { Out << "; Function Attrs: " << AttrStr << '\n'; } +#if defined(ENABLE_AUTOTUNER) + if (!PrintDeclarationOnly) + Machine.incorporateFunction(F); + + if (F->isDeclaration() || PrintDeclarationOnly) { + Out << "declare"; + if (!PrintDeclarationOnly) { + SmallVector, 4> MDs; + F->getAllMetadata(MDs); + printMetadataAttachments(MDs, " "); + } +#else Machine.incorporateFunction(F); if (F->isDeclaration()) { @@ -3799,6 +3926,7 @@ void AssemblyWriter::printFunction(const Function *F) { SmallVector, 4> MDs; F->getAllMetadata(MDs); printMetadataAttachments(MDs, " "); +#endif Out << ' '; } else Out << "define "; @@ -3824,7 +3952,11 @@ void AssemblyWriter::printFunction(const Function *F) { Out << '('; // Loop over the arguments, printing them... +#if defined(ENABLE_AUTOTUNER) + if ((F->isDeclaration() && !IsForDebug) || PrintDeclarationOnly) { +#else if (F->isDeclaration() && !IsForDebug) { +#endif // We're only interested in the type here - don't print argument names. for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) { // Insert commas as we go... the first arg doesn't get a comma @@ -3895,7 +4027,11 @@ void AssemblyWriter::printFunction(const Function *F) { writeOperand(F->getPersonalityFn(), /*PrintType=*/true); } +#if defined(ENABLE_AUTOTUNER) + if (F->isDeclaration() || PrintDeclarationOnly) { +#else if (F->isDeclaration()) { +#endif Out << '\n'; } else { SmallVector, 4> MDs; @@ -3913,6 +4049,13 @@ void AssemblyWriter::printFunction(const Function *F) { Out << "}\n"; } +#if defined(ENABLE_AUTOTUNER) + // Output metadata + if (!Machine.mdn_empty() && PrintCompleteIR && !PrintDeclarationOnly) { + Out << '\n'; + writeAllMDNodes(); + } +#endif Machine.purgeFunction(); } @@ -4591,13 +4734,21 @@ void AssemblyWriter::printUseLists(const Function *F) { void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW, bool ShouldPreserveUseListOrder, +#if defined(ENABLE_AUTOTUNER) + bool IsForDebug, bool PrintCompleteIR) const { +#else bool IsForDebug) const { +#endif SlotTracker SlotTable(this->getParent()); formatted_raw_ostream OS(ROS); AssemblyWriter W(OS, SlotTable, this->getParent(), AAW, IsForDebug, ShouldPreserveUseListOrder); +#if defined(ENABLE_AUTOTUNER) + W.printFunction(this, PrintCompleteIR); +#else W.printFunction(this); +#endif } void BasicBlock::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW, diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt index 217fe703dd4eeff42f47bcde893285ced8f3a3c2..d44d1eea9f3e6974216597fcc05bd021d56eb697 100644 --- a/llvm/lib/IR/CMakeLists.txt +++ b/llvm/lib/IR/CMakeLists.txt @@ -78,6 +78,7 @@ add_llvm_component_library(LLVMCore intrinsics_gen LINK_COMPONENTS + AutoTuner BinaryFormat Demangle Remarks diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 435800d9e5f9fe3cc68d70bc6c824d4b35493724..ec2620efac384989521d0c9332329a07c9ea7f7a 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -70,6 +70,10 @@ #include #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/IR/StructuralHash.h" +#endif + using namespace llvm; using ProfileCount = Function::ProfileCount; @@ -1977,6 +1981,36 @@ std::optional Function::getSectionPrefix() const { return std::nullopt; } +#if defined(ENABLE_AUTOTUNER) +uint64_t AutoTuningEnabledFunction::computeStructuralHash() { + return StructuralHash(*(this->Func)); +} + +void AutoTuningEnabledFunction::initCodeRegion() { + StringRef FuncName = Func->getName(); + StringRef EntryBBName; + autotuning::SourceLocation Loc; + + if (!Func->empty()) + EntryBBName = Func->front().getName(); + else + EntryBBName = StringRef("None"); + + DISubprogram *SubProgram = Func->getSubprogram(); + if (SubProgram) + // Set the column number to 0 because there is no information about + // column number for functions. + Loc = {SubProgram->getFilename().str(), SubProgram->getLine(), 0}; + + autotuning::CodeRegion CR = + autotuning::CodeRegion(EntryBBName.data(), FuncName.data(), + autotuning::CodeRegionType::Function, Loc); + CR.setSize(Func->getInstructionCount()); + CR.setHotness(this->getHotness()); + this->setCodeRegion(CR); +} +#endif + bool Function::nullPointerIsDefined() const { return hasFnAttribute(Attribute::NullPointerIsValid); } diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index cb0ac0f8eae6ffe41ef38a3e7f914b4eaa27eedd..e614285df07ae9a2e5176aac53a29d31b25c3322 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -45,6 +45,9 @@ #include #include #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/IR/StructuralHash.h" +#endif using namespace llvm; @@ -259,6 +262,89 @@ void LandingPadInst::addClause(Constant *Val) { getOperandList()[OpNo] = Val; } +#if defined(ENABLE_AUTOTUNER) +uint64_t AutoTuningEnabledSwitchInst::computeStructuralHash() { + return StructuralHash(*(this->SI)); +} + +void AutoTuningEnabledSwitchInst::initCodeRegion() { + std::string SwitchName; + if (this->SI->hasName()) { + SwitchName = this->SI->getName().str(); + } else { + std::string Str; + llvm::raw_string_ostream RSO(Str); + this->SI->getCondition()->printAsOperand(RSO); + SwitchName = RSO.str(); + } + + autotuning::CodeRegion CR = autotuning::CodeRegion( + SwitchName, this->SI->getFunction()->getName().str(), + autotuning::CodeRegionType::Switch, this->SI->getDebugLoc()); + + unsigned TotalNumInsts = 0; + for (auto Case : SI->cases()) { + const BasicBlock *BB = Case.getCaseSuccessor(); + unsigned NumInsts = std::distance(BB->instructionsWithoutDebug().begin(), + BB->instructionsWithoutDebug().end()); + TotalNumInsts += NumInsts; + } + + CR.setSize(TotalNumInsts); + // Compute hotness. + autotuning::HotnessType Hotness = + this->SI->getFunction()->ATEFunction.getHotness(); + CR.setHotness(Hotness); + + this->setCodeRegion(CR); +} + +uint64_t AutoTuningEnabledCallSite::computeStructuralHash() { + return StructuralHash(*(this->CB)); +} + +void AutoTuningEnabledCallSite::initCodeRegion() { + // Use Caller's name as FuncName and Callee's name as Name of a CodeRegion. + Function *Caller = this->CB->getCaller(); + Function *Callee = this->CB->getCalledFunction(); + if (Caller == nullptr || Callee == nullptr) { + this->setCodeRegion(autotuning::CodeRegion::getInvalidInstance()); + return; + } + + autotuning::SourceLocation SrcLoc; + if (this->CB->getDebugLoc()) { + unsigned int SourceLine = this->CB->getDebugLoc()->getLine(); + // Get modified source line number for current callsite if there is another + // call instruction (to same callee) which has same source line number + // happened due to inlining. + std::optional LineNum = autotuning::Engine.getCallSiteLoc(CB); + if (LineNum) + SourceLine = *LineNum; + SrcLoc = autotuning::SourceLocation{ + this->CB->getDebugLoc()->getFilename().str(), SourceLine, + this->CB->getDebugLoc()->getColumn()}; + } + + // We are using DebugLoc to distinguish between multiple calls to the same + // callee in a function. It may be possible that these multiple calls have + // same DebugLoc either 1) due to inlining of multiple calls (same callee) + // and callee having more calls, or 2) cloned calls added by previous + // optimizations. We are using 'callee name + it's parent (basic block) name' + // to solve these problems. Additionally we are using modified line number + // for the issue # 1; this will handle the cases where the multiple calls are + // in the same basic block. + autotuning::CodeRegion CR = autotuning::CodeRegion( + Callee->getName().str() + "-" + this->CB->getParent()->getName().str(), + Caller->getName().data(), autotuning::CodeRegionType::CallSite, SrcLoc, + autotuning::DynamicOptions{{"ForceInline", {0, 1}}}); + + CR.setSize(Callee->getInstructionCount()); + CR.setHotness(Caller->ATEFunction.getHotness()); + this->setCodeRegion(CR); +} +#endif + //===----------------------------------------------------------------------===// // CallBase Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index 6ea108d831a165584db7cf1bf29794595a333531..1583e1c82b3e540020763762d279c7cc18c82ee6 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -10,9 +10,23 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Module.h" +#if defined(ENABLE_AUTOTUNER) +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CommandLine.h" +#endif using namespace llvm; +#if defined(ENABLE_AUTOTUNER) +// AutoTuner Flag to use callsite Debug Location for hash cacluation. +static cl::opt HashCallSite( + "hash-prior-to-callsite", cl::init(true), cl::Hidden, + cl::desc("Use function IR prior to a call site to compute the hashcode for" + " the call site")); +#endif + namespace { // Basic hashing mechanism to detect structural change to the IR, used to verify @@ -21,16 +35,81 @@ namespace { class StructuralHashImpl { hash_code Hash; +#if defined(ENABLE_AUTOTUNER) + const uint64_t BLOCK_HEADER_HASH = 45798; +#endif template void hash(const T &V) { Hash = hash_combine(Hash, V); } public: StructuralHashImpl() : Hash(4) {} +#if defined(ENABLE_AUTOTUNER) + void update(const MachineBasicBlock &MBB) { + // Update the structural hash when we encounter a new basic block. + // Prevents CodeRegions with different structures, but many empty + // BasicBlocks to have the same structural hash. + if (const BasicBlock *Block = MBB.getBasicBlock()) { + hash(BLOCK_HEADER_HASH); // Block header + for (auto &Inst : *Block) + hash(Inst.getOpcode()); + } + } + + void update(const std::vector BBs) { + // Update the structural hash when we encounter a new basic block. + // Prevents CodeRegions with different structures, but many empty + // BasicBlocks to have the same structural hash. + for (BasicBlock *BB : BBs) { + if (BB == nullptr) + continue; + + hash(BLOCK_HEADER_HASH); // Block header + for (auto &Inst : *BB) + hash(Inst.getOpcode()); + } + } + + void update(const llvm::CallBase &CB) { + StringRef Name = ""; + if (HashCallSite) { + update(*CB.getCaller(), std::addressof(CB)); + } else { + const Function &F = *CB.getCaller(); + Name = F.getName(); + std::string FileName = Name.str(); + for (uint64_t Idx = 0; Idx < Name.size(); Idx = Idx + sizeof(uint64_t)) { + uint64_t Value = 0; + FileName.copy((char *)&Value, sizeof(uint64_t), Idx); + hash(Value); + } + } + + update(*CB.getCalledFunction()); + } + + void update(const SwitchInst &SI) { + hash(SI.getNumCases()); + for (auto Case : SI.cases()) { + hash(BLOCK_HEADER_HASH); + const BasicBlock *BB = Case.getCaseSuccessor(); + for (auto &Inst : *BB) + hash(Inst.getOpcode()); + } + } + + void update(const Function &F, const CallBase *TargetCB = nullptr) { + if (F.isDeclaration()) + return; + + const Instruction *I = + TargetCB ? (dyn_cast(TargetCB)) : nullptr; +#else void update(const Function &F) { // Declarations don't affect analyses. if (F.isDeclaration()) return; +#endif hash(12345); // Function header @@ -44,9 +123,18 @@ public: VisitedBBs.insert(BBs[0]); while (!BBs.empty()) { const BasicBlock *BB = BBs.pop_back_val(); +#if defined(ENABLE_AUTOTUNER) + hash(BLOCK_HEADER_HASH); // Block header + for (auto &Inst : *BB) { + hash(Inst.getOpcode()); + if (I && Inst.isIdenticalTo(I)) + return; + } +#else hash(45798); // Block header for (auto &Inst : *BB) hash(Inst.getOpcode()); +#endif const Instruction *Term = BB->getTerminator(); for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { @@ -79,6 +167,32 @@ public: } // namespace +#if defined(ENABLE_AUTOTUNER) +uint64_t llvm::StructuralHash(const MachineBasicBlock &MBB) { + StructuralHashImpl H; + H.update(MBB); + return H.getHash(); +} + +uint64_t llvm::StructuralHash(const std::vector BBs) { + StructuralHashImpl H; + H.update(BBs); + return H.getHash(); +} + +uint64_t llvm::StructuralHash(const CallBase &CB) { + StructuralHashImpl H; + H.update(CB); + return H.getHash(); +} + +uint64_t llvm::StructuralHash(const SwitchInst &SI) { + StructuralHashImpl H; + H.update(SI); + return H.getHash(); +} +#endif + uint64_t llvm::StructuralHash(const Function &F) { StructuralHashImpl H; H.update(F); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index d0cbbcc0e310b9821a7ac166de14f524f90cf0dd..a3ccbc6d258f464c2531c53d904fae9335853e11 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -262,6 +262,11 @@ #include "llvm/Transforms/Vectorize/VectorCombine.h" #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/Analysis/AutotuningDump.h" +#include "llvm/Transforms/Scalar/AutoTuningCompile.h" +#endif + using namespace llvm; static const Regex DefaultAliasRegex( diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 660cb2e974d781608cb683dfbb05b61251db4e32..8009e011833cc41a3418e75a59e20c4dd110336c 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -133,6 +133,11 @@ #include "llvm/Transforms/Vectorize/SLPVectorizer.h" #include "llvm/Transforms/Vectorize/VectorCombine.h" +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/Transforms/Scalar/AutoTuningCompile.h" +#endif + using namespace llvm; static cl::opt UseInlineAdvisor( @@ -289,6 +294,10 @@ PipelineTuningOptions::PipelineTuningOptions() { EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; } +#if defined(ENABLE_AUTOTUNER) +extern cl::opt AutoTuningCompileMode; +#endif + namespace llvm { extern cl::opt MaxDevirtIterations; extern cl::opt EnableKnowledgeRetention; @@ -452,9 +461,17 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, // attention to it. if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || PGOOpt->Action != PGOOptions::SampleUse) +#if defined(ENABLE_AUTOTUNER) + { + if (AutoTuningCompileMode) + LPM2.addPass(AutoTuningCompileLoopPass(autotuning::CompileOptionUnroll)); +#endif LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), /* OnlyWhenForced= */ !PTO.LoopUnrolling, PTO.ForgetAllSCEVInLoopUnroll)); +#if defined(ENABLE_AUTOTUNER) + } +#endif invokeLoopOptimizerEndEPCallbacks(LPM2, Level); @@ -631,9 +648,17 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // attention to it. if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || PGOOpt->Action != PGOOptions::SampleUse) +#if defined(ENABLE_AUTOTUNER) + { + if (AutoTuningCompileMode) + LPM2.addPass(AutoTuningCompileLoopPass(autotuning::CompileOptionUnroll)); +#endif LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), /* OnlyWhenForced= */ !PTO.LoopUnrolling, PTO.ForgetAllSCEVInLoopUnroll)); +#if defined(ENABLE_AUTOTUNER) + } +#endif invokeLoopOptimizerEndEPCallbacks(LPM2, Level); @@ -1110,6 +1135,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, if (EnableSyntheticCounts && !PGOOpt) MPM.addPass(SyntheticCountsPropagation()); +#if defined(ENABLE_AUTOTUNER) + if (AutoTuningCompileMode) + MPM.addPass(AutoTuningCompileModulePass(autotuning::CompileOptionInline)); +#endif + if (EnableModuleInliner) MPM.addPass(buildModuleInlinerPipeline(Level, Phase)); else @@ -1131,6 +1161,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, /// TODO: Should LTO cause any differences to this set of passes? void PassBuilder::addVectorPasses(OptimizationLevel Level, FunctionPassManager &FPM, bool IsFullLTO) { +#if defined(ENABLE_AUTOTUNER) + if (AutoTuningCompileMode && !IsFullLTO) + FPM.addPass( + AutoTuningCompileFunctionPass(autotuning::CompileOptionVectorize)); +#endif + FPM.addPass(LoopVectorizePass( LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); @@ -1444,6 +1480,10 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, return buildO0DefaultPipeline(Level, LTOPreLink); ModulePassManager MPM; +#if defined(ENABLE_AUTOTUNER) + if (AutoTuningCompileMode) + MPM.addPass(AutoTuningCompileModulePass(autotuning::CompileOptionStart)); +#endif // Convert @llvm.global.annotations to !annotation metadata. MPM.addPass(Annotation2MetadataPass()); @@ -1475,6 +1515,12 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, if (LTOPreLink) addRequiredLTOPreLinkPasses(MPM); + +#if defined(ENABLE_AUTOTUNER) + if (AutoTuningCompileMode) + MPM.addPass(AutoTuningCompileModulePass(autotuning::CompileOptionEnd)); +#endif + return MPM; } diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index e10dc995c493056d7bc148c59ffc69b49c868117..45a539f14b93e210be4fa34e94107a86b80e0ba2 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -29,6 +29,10 @@ MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) MODULE_ANALYSIS("inline-advisor", InlineAdvisorAnalysis()) MODULE_ANALYSIS("ir-similarity", IRSimilarityAnalysis()) +#if defined(ENABLE_AUTOTUNER) +MODULE_ANALYSIS("autotuning-dump", AutotuningDumpAnalysis()) +#endif + #ifndef MODULE_ALIAS_ANALYSIS #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ MODULE_ANALYSIS(NAME, CREATE_PASS) @@ -127,6 +131,9 @@ MODULE_PASS("sanmd-module", SanitizerBinaryMetadataPass()) MODULE_PASS("memprof-module", ModuleMemProfilerPass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass()) +#if defined(ENABLE_AUTOTUNER) +MODULE_PASS("autotuning-compile-module", AutoTuningCompileModulePass()) +#endif #undef MODULE_PASS #ifndef MODULE_PASS_WITH_PARAMS @@ -430,6 +437,9 @@ FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass()) FUNCTION_PASS("tsan", ThreadSanitizerPass()) FUNCTION_PASS("memprof", MemProfilerPass()) FUNCTION_PASS("declare-to-assign", llvm::AssignmentTrackingPass()) +#if defined(ENABLE_AUTOTUNER) +FUNCTION_PASS("autotuning-compile-function", AutoTuningCompileFunctionPass()) +#endif #undef FUNCTION_PASS #ifndef FUNCTION_PASS_WITH_PARAMS @@ -614,6 +624,9 @@ LOOP_PASS("guard-widening", GuardWideningPass()) LOOP_PASS("loop-bound-split", LoopBoundSplitPass()) LOOP_PASS("loop-reroll", LoopRerollPass()) LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass()) +#if defined(ENABLE_AUTOTUNER) +LOOP_PASS("autotuning-compile-loop", AutoTuningCompileLoopPass()) +#endif #undef LOOP_PASS #ifndef LOOP_PASS_WITH_PARAMS diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 7eef511928ec5ff70c9fe3bcfd66b3173ea6570b..8653027ceed2d5b5018dfe9931f0aecb0925a947 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -41,6 +41,10 @@ #include #include #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/Transforms/Scalar/AutoTuningCompile.h" +#endif using namespace llvm; @@ -107,6 +111,10 @@ static cl::opt PrintOnCrash( cl::desc("Print the last form of the IR before crash (use -print-on-crash-path to dump to a file)"), cl::Hidden); +#if defined(ENABLE_AUTOTUNER) +extern cl::opt AutoTuningCompileMode; +#endif + static cl::opt OptBisectPrintIRPath( "opt-bisect-print-ir-path", cl::desc("Print IR to path when opt-bisect-limit is reached"), cl::Hidden); @@ -874,6 +882,21 @@ bool OptPassGateInstrumentation::shouldRun(StringRef PassName, Any IR) { void OptPassGateInstrumentation::registerCallbacks( PassInstrumentationCallbacks &PIC) { +#if defined(ENABLE_AUTOTUNER) + // Using AutoTuner OptBisect to change the behavior of compilation pipeline. + // Flag 'opt-bisect-limit' will be preferred if both 'opt-bisect-limit' and + // incremental compilation flags are used. + if (autotuning::Engine.isParseInput() && AutoTuningCompileMode) { + if (!getAutoTuningOptPassGate().isEnabled()) + return; + + PIC.registerShouldRunOptionalPassCallback([](StringRef PassID, Any IR) { + return isIgnored(PassID) || + getAutoTuningOptPassGate().checkPass(PassID, getIRName(IR)); + }); + return; + } +#endif OptPassGate &PassGate = Context.getOptPassGate(); if (!PassGate.isEnabled()) return; diff --git a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp index b2627196bce62d770888c6c158cadf880ab57c37..b1dfa9d0f2cfe51a1405d4a6daec143e70988749 100644 --- a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp +++ b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp @@ -277,6 +277,14 @@ void BitstreamRemarkSerializerHelper::emitRemarkBlock(const Remark &Remark, R.push_back(StrTab.add(Remark.RemarkName).first); R.push_back(StrTab.add(Remark.PassName).first); R.push_back(StrTab.add(Remark.FunctionName).first); +#if defined(ENABLE_AUTOTUNER) + if (Remark.CodeRegionType) + R.push_back(StrTab.add(*Remark.CodeRegionType).first); + if (std::optional hash = Remark.CodeRegionHash) + R.push_back(*hash); + if (std::optional Invocation = Remark.Invocation) + R.push_back(*Invocation); +#endif Bitstream.EmitRecordWithAbbrev(RecordRemarkHeaderAbbrevID, R); if (const std::optional &Loc = Remark.Loc) { diff --git a/llvm/lib/Remarks/RemarkStreamer.cpp b/llvm/lib/Remarks/RemarkStreamer.cpp index 9f4676ce37ab9970a9644b6c0d2a445cfce2f5e0..d1faf4f1553a402cd93c2915ef5acce39a35678a 100644 --- a/llvm/lib/Remarks/RemarkStreamer.cpp +++ b/llvm/lib/Remarks/RemarkStreamer.cpp @@ -14,6 +14,10 @@ #include "llvm/Support/CommandLine.h" #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/IR/DebugInfoMetadata.h" +#endif + using namespace llvm; using namespace llvm::remarks; diff --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp b/llvm/lib/Remarks/YAMLRemarkParser.cpp index f5123b0f64ce11378e7868cd24cc8598d3c91272..baa393c6a619615916dc11dae04cfb4427c8be62 100644 --- a/llvm/lib/Remarks/YAMLRemarkParser.cpp +++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp @@ -17,10 +17,23 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/Path.h" #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/Support/CommandLine.h" +#endif using namespace llvm; using namespace llvm::remarks; +#if defined(ENABLE_AUTOTUNER) +// Creating code regions without meta data (e.g. debug Location, Function Name, +// etc.). +// This flag is added here instead of 'lib/AutoTuner/AutoTuning.cpp' to avoid +// making LLVMRemarks dependent on LLVMCore. +cl::opt OmitAutotuningMetadata( + "auto-tuning-omit-metadata", cl::Hidden, cl::init(false), + cl::desc("Include only code region hashes and types in opportunity files")); +#endif + char YAMLParseError::ID = 0; static void handleDiagnostic(const SMDiagnostic &Diag, void *Ctx) { @@ -235,6 +248,23 @@ YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) { TheRemark.FunctionName = *MaybeStr; else return MaybeStr.takeError(); +#if defined(ENABLE_AUTOTUNER) + } else if (KeyName == "CodeRegionType") { + if (Expected MaybeStr = parseStr(RemarkField)) + TheRemark.CodeRegionType = *MaybeStr; + else + return MaybeStr.takeError(); + } else if (KeyName == "CodeRegionHash") { + if (Expected MaybeULL = parseUnsignedLL(RemarkField)) + TheRemark.CodeRegionHash = *MaybeULL; + else + return MaybeULL.takeError(); + } else if (KeyName == "Invocation") { + if (Expected MaybeULL = parseUnsignedLL(RemarkField)) + TheRemark.Invocation = *MaybeULL; + else + return MaybeULL.takeError(); +#endif } else if (KeyName == "Hotness") { if (Expected MaybeU = parseUnsigned(RemarkField)) TheRemark.Hotness = *MaybeU; @@ -261,11 +291,35 @@ YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) { } } +#if defined(ENABLE_AUTOTUNER) + // Check if any of the mandatory fields are missing. + if (TheRemark.RemarkType == Type::AutoTuning) { + // We expect type, and pass to be present at least. + if (!TheRemark.CodeRegionType || TheRemark.PassName.empty()) + return error("CodeRegionHash, CodeRegionType, or Pass missing.", + *RemarkEntry.getRoot()); + + // Sanity check for the correct command line option. + if (!OmitAutotuningMetadata && TheRemark.RemarkName.empty()) + return error("Remark Name expected; enable -autotuning-omit-metadata.", + *RemarkEntry.getRoot()); + + if (!OmitAutotuningMetadata && TheRemark.FunctionName.empty()) + return error( + "Remark Function Name expected; enable -autotuning-omit-metadata.", + *RemarkEntry.getRoot()); + } else if (TheRemark.RemarkType == Type::Unknown || + TheRemark.PassName.empty() || TheRemark.RemarkName.empty() || + TheRemark.FunctionName.empty()) + return error("Type, Pass, Name or Function missing.", + *RemarkEntry.getRoot()); +#else // Check if any of the mandatory fields are missing. if (TheRemark.RemarkType == Type::Unknown || TheRemark.PassName.empty() || TheRemark.RemarkName.empty() || TheRemark.FunctionName.empty()) return error("Type, Pass, Name or Function missing.", *RemarkEntry.getRoot()); +#endif return std::move(Result); } @@ -277,6 +331,9 @@ Expected YAMLRemarkParser::parseType(yaml::MappingNode &Node) { .Case("!Analysis", remarks::Type::Analysis) .Case("!AnalysisFPCommute", remarks::Type::AnalysisFPCommute) .Case("!AnalysisAliasing", remarks::Type::AnalysisAliasing) +#if defined(ENABLE_AUTOTUNER) + .Case("!AutoTuning", remarks::Type::AutoTuning) +#endif .Case("!Failure", remarks::Type::Failure) .Default(remarks::Type::Unknown); if (Type == remarks::Type::Unknown) @@ -313,6 +370,31 @@ Expected YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) { return Result; } +#if defined(ENABLE_AUTOTUNER) +Expected> +YAMLRemarkParser::parseStrVector(yaml::KeyValueNode &Node) { + std::vector Result; + auto *SequenceNode = dyn_cast(Node.getValue()); + if (!SequenceNode) + return error("expected a value of sequence type.", Node); + + for (yaml::Node &Element : *SequenceNode) { + auto *ScalarNode = dyn_cast(&Element); + if (!ScalarNode) + return error("expected a value of scalar type.", Element); + else { + StringRef Str = ScalarNode->getRawValue(); + if (Str.front() == '\'') + Str = Str.drop_front(); + if (Str.back() == '\'') + Str = Str.drop_back(); + Result.push_back(Str); + } + } + return Result; +} +#endif + Expected YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) { SmallVector Tmp; auto *Value = dyn_cast(Node.getValue()); @@ -324,6 +406,19 @@ Expected YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) { return UnsignedValue; } +#if defined(ENABLE_AUTOTUNER) +Expected YAMLRemarkParser::parseUnsignedLL(yaml::KeyValueNode &Node) { + SmallVector Tmp; + if (auto *Value = dyn_cast(Node.getValue())) { + uint64_t UnsignedValue = 0; + if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue)) + return error("expected a value of integer type.", *Value); + return UnsignedValue; + } + return error("expected a value of scalar type.", Node); +} +#endif + Expected YAMLRemarkParser::parseDebugLoc(yaml::KeyValueNode &Node) { auto *DebugLoc = dyn_cast(Node.getValue()); @@ -374,6 +469,9 @@ Expected YAMLRemarkParser::parseArg(yaml::Node &Node) { std::optional KeyStr; std::optional ValueStr; +#if defined(ENABLE_AUTOTUNER) + std::optional> ValueStrVector; +#endif std::optional Loc; for (yaml::KeyValueNode &ArgEntry : *ArgMap) { @@ -400,11 +498,27 @@ Expected YAMLRemarkParser::parseArg(yaml::Node &Node) { if (ValueStr) return error("only one string entry is allowed per argument.", ArgEntry); +#if defined(ENABLE_AUTOTUNER) + // Try to parse the value to a string vector. + if (Expected> MaybeStrVector = + parseStrVector(ArgEntry)) { + ValueStrVector = *MaybeStrVector; + ValueStr = ""; + } else { + consumeError(MaybeStrVector.takeError()); + // Try to parse the value. + if (Expected MaybeStr = parseStr(ArgEntry)) + ValueStr = *MaybeStr; + else + return MaybeStr.takeError(); + } +#else // Try to parse the value. if (Expected MaybeStr = parseStr(ArgEntry)) ValueStr = *MaybeStr; else return MaybeStr.takeError(); +#endif // Keep the key from the string. KeyStr = KeyName; @@ -412,10 +526,18 @@ Expected YAMLRemarkParser::parseArg(yaml::Node &Node) { if (!KeyStr) return error("argument key is missing.", *ArgMap); +#if defined(ENABLE_AUTOTUNER) + if (!ValueStr && !ValueStrVector) +#else if (!ValueStr) +#endif return error("argument value is missing.", *ArgMap); +#if defined(ENABLE_AUTOTUNER) + return Argument{*KeyStr, *ValueStr, ValueStrVector, Loc}; +#else return Argument{*KeyStr, *ValueStr, Loc}; +#endif } Expected> YAMLRemarkParser::next() { diff --git a/llvm/lib/Remarks/YAMLRemarkParser.h b/llvm/lib/Remarks/YAMLRemarkParser.h index 8ef72e16be74e3599cbfe1ae9affa427249cd024..141f10dd3900d1004c56df4ed897e5d555c69ba1 100644 --- a/llvm/lib/Remarks/YAMLRemarkParser.h +++ b/llvm/lib/Remarks/YAMLRemarkParser.h @@ -91,6 +91,12 @@ protected: Expected parseDebugLoc(yaml::KeyValueNode &Node); /// Parse an argument. Expected parseArg(yaml::Node &Node); +#if defined(ENABLE_AUTOTUNER) + /// parse a vector of strings. + Expected> parseStrVector(yaml::KeyValueNode &Node); + /// Parse one value to an unsigned long long. + Expected parseUnsignedLL(yaml::KeyValueNode &Node); +#endif }; /// YAML with a string table to Remark parser. diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp index 68285c3dde1bf9c089c5412ebbfe5e95b06d0b32..1bc0f23f92218026005e664de3650237ef82ccde 100644 --- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp +++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp @@ -15,10 +15,45 @@ #include "llvm/Remarks/Remark.h" #include "llvm/Support/FileSystem.h" #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/Support/CommandLine.h" +#endif using namespace llvm; using namespace llvm::remarks; +#if defined(ENABLE_AUTOTUNER) +extern cl::opt OmitAutotuningMetadata; + +// Use the same keys whether we use a string table or not (respectively, T is an +// unsigned or a StringRef). +template +static void mapRemarkHeader( + yaml::IO &io, T PassName, T RemarkName, std::optional RL, + T FunctionName, std::optional CodeRegionType, + std::optional CodeRegionHash, + std::optional Invocation, + std::optional> BaselineConfig, + std::optional>> + AutoTunerOptions, + std::optional Hotness, ArrayRef Args) { + io.mapRequired("Pass", PassName); + if (!OmitAutotuningMetadata) { + io.mapRequired("Name", RemarkName); + io.mapOptional("DebugLoc", RL); + io.mapRequired("Function", FunctionName); + } + io.mapOptional("CodeRegionType", CodeRegionType); + io.mapOptional("CodeRegionHash", CodeRegionHash); + io.mapOptional("DynamicConfigs", AutoTunerOptions); + io.mapOptional("BaselineConfig", BaselineConfig); + io.mapOptional("Invocation", Invocation); + if (!OmitAutotuningMetadata) { + io.mapOptional("Hotness", Hotness); + io.mapOptional("Args", Args); + } +} +#else // Use the same keys whether we use a string table or not (respectively, T is an // unsigned or a StringRef). template @@ -33,6 +68,7 @@ static void mapRemarkHeader(yaml::IO &io, T PassName, T RemarkName, io.mapOptional("Hotness", Hotness); io.mapOptional("Args", Args); } +#endif namespace llvm { namespace yaml { @@ -53,6 +89,10 @@ template <> struct MappingTraits { else if (io.mapTag("!AnalysisAliasing", (Remark->RemarkType == Type::AnalysisAliasing))) ; +#if defined(ENABLE_AUTOTUNER) + else if (io.mapTag("!AutoTuning", (Remark->RemarkType == Type::AutoTuning))) + ; +#endif else if (io.mapTag("!Failure", (Remark->RemarkType == Type::Failure))) ; else @@ -66,14 +106,58 @@ template <> struct MappingTraits { unsigned NameID = StrTab.add(Remark->RemarkName).first; unsigned FunctionID = StrTab.add(Remark->FunctionName).first; mapRemarkHeader(io, PassID, NameID, Remark->Loc, FunctionID, +#if defined(ENABLE_AUTOTUNER) + Remark->CodeRegionType, Remark->CodeRegionHash, + Remark->Invocation, Remark->BaselineConfig, + Remark->AutoTunerOptions, Remark->Hotness, Remark->Args); + +#else Remark->Hotness, Remark->Args); +#endif } else { mapRemarkHeader(io, Remark->PassName, Remark->RemarkName, Remark->Loc, +#if defined(ENABLE_AUTOTUNER) + Remark->FunctionName, Remark->CodeRegionType, + Remark->CodeRegionHash, Remark->Invocation, + Remark->BaselineConfig, Remark->AutoTunerOptions, + Remark->Hotness, Remark->Args); +#else Remark->FunctionName, Remark->Hotness, Remark->Args); +#endif } } }; +#if defined(ENABLE_AUTOTUNER) +// YAML I/O to support dumping 'Values: { key: [...], ... }' in opportunity +// files. +template <> +struct MappingTraits>> { + static void mapping(IO &io, + std::map> &OM) { + assert(io.outputting() && "input not yet implemented"); + + // Print as an abbreviated dictionary + llvm::yaml::StdMapStringCustomMappingTraitsImpl< + std::vector>::output(io, OM); + } + // This sets the beginFlowMapping and endFlowMapping + static const bool flow = true; +}; + +template <> struct MappingTraits> { + static void mapping(IO &io, std::map &OM) { + assert(io.outputting() && "input not yet implemented"); + + // Print as an abbreviated dictionary + llvm::yaml::StdMapStringCustomMappingTraitsImpl::output(io, + OM); + } + // This sets the beginFlowMapping and endFlowMapping + static const bool flow = true; +}; +#endif + template <> struct MappingTraits { static void mapping(IO &io, RemarkLocation &RL) { assert(io.outputting() && "input not yet implemented"); diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index d3efb8b67be5c67d47a09ff8690b7351bc0d936b..b66415c0e9a91f88edce1960856463b173b4d426 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -127,6 +127,9 @@ static inline bool isPrefixedOrGrouping(const Option *O) { O->getFormattingFlag() == cl::AlwaysPrefix; } +#if defined(ENABLE_AUTOTUNER) +#include +#endif namespace { @@ -1470,6 +1473,44 @@ bool cl::ParseCommandLineOptions(int argc, const char *const *argv, Errs, LongOptionsUseDoubleDash); } +#if defined(ENABLE_AUTOTUNER) +bool cl::ParseAutoTunerOptions( + std::unordered_map LLVMParams, + std::unordered_map ProgramParams, + StringRef Overview, raw_ostream *Errs, const char *EnvVar, + bool LongOptionsUseDoubleDash) { + SmallVector NewArgv; + BumpPtrAllocator A; + StringSaver Saver(A); + // GlobalParser requires arguments similar to C style command line options + // (int argc, char * argv[]) where argv[0] refers to the program name. + // We are using a fake program name here which is consistent with LLVM. + NewArgv.push_back("AutoTuner (LLVM option parsing)"); + + for (const auto &I : LLVMParams) { + std::string NewOption = I.first + "=" + I.second; + NewArgv.push_back(Saver.save(NewOption).data()); + } + + for (const auto &I : ProgramParams) { + std::string NewOption = I.first + "=" + I.second; + NewArgv.push_back(Saver.save(NewOption).data()); + } + + // Parse options from environment variable. + if (EnvVar) { + if (std::optional EnvValue = + sys::Process::GetEnv(StringRef(EnvVar))) + TokenizeGNUCommandLine(*EnvValue, Saver, NewArgv); + } + + int NewArgc = static_cast(NewArgv.size()); + // Parse all options. + return GlobalParser->ParseCommandLineOptions(NewArgc, &NewArgv[0], Overview, + Errs, LongOptionsUseDoubleDash); +} +#endif + /// Reset all options at least once, so that we can parse different options. void CommandLineParser::ResetAllOptionOccurrences() { // Reset all option values to look like they have never been seen before. diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 034f1587ae8df44bdb5b9f311197dcfdbdc1d938..3507d357a4c64cacd0975f835a2d4b521de8aaff 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -57,6 +57,7 @@ add_llvm_component_library(LLVMipo LINK_COMPONENTS AggressiveInstCombine Analysis + AutoTuner BitReader BitWriter Core diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 3e00aebce372a566bcfe2e4ecc9a4dfb9a48264a..802667819c44b6e04d4d38096de8838d40964a46 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -64,6 +64,9 @@ #include #include #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif using namespace llvm; @@ -298,6 +301,27 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // be deleted as a batch after inlining. SmallVector DeadFunctionsInComdats; +#if defined(ENABLE_AUTOTUNER) + bool IsAutoTunerEnabled = + autotuning::Engine.isEnabled() && + autotuning::Engine.isTuningAllowedForType(autotuning::CallSite); + if (IsAutoTunerEnabled) { + SmallVector, 16> CallsCopy = Calls; + for (int I = 0; I < (int)CallsCopy.size(); ++I) { + CallBase &CB = *CallsCopy[I].first; + DebugLoc DLoc = CB.getDebugLoc(); + if (!CB.getCaller() || !CB.getCalledFunction() || !DLoc) + continue; + autotuning::CallSiteLocation Loc = autotuning::CallSiteLocation{ + &CB, CB.getCaller(), CB.getCalledFunction(), + autotuning::SourceLocation{DLoc->getFilename().str(), DLoc->getLine(), + DLoc->getColumn()}}; + autotuning::Engine.insertCallSiteLoc(Loc); + } + autotuning::Engine.cleanCallSiteLoc(); + } +#endif + // Loop forward over all of the calls. Note that we cannot cache the size as // inlining can introduce new calls that need to be processed. for (int I = 0; I < (int)Calls.size(); ++I) { @@ -412,6 +436,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (NewCallee) { if (!NewCallee->isDeclaration()) { Calls.push_back({ICB, NewHistoryID}); +#if defined(ENABLE_AUTOTUNER) + if (IsAutoTunerEnabled) + if (ICB->getDebugLoc()) + autotuning::Engine.updateCallSiteLocs( + CB, ICB, ICB->getCalledFunction(), + ICB->getDebugLoc()->getLine()); +#endif // Continually inlining through an SCC can result in huge compile // times and bloated code since we arbitrarily stop at some point // when the inliner decides it's not profitable to inline anymore. @@ -527,6 +558,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, FAM.invalidate(F, PreservedAnalyses::none()); } +#if defined(ENABLE_AUTOTUNER) + if (IsAutoTunerEnabled) + autotuning::Engine.clearCallSiteLocs(); +#endif + // We must ensure that we only delete functions with comdats if every function // in the comdat is going to be deleted. if (!DeadFunctionsInComdats.empty()) { diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index a53baecd4776dbd8a28910c01aa0d478baa17b65..9590cf625c640b25138d13ee8508a807c78c6fb3 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1212,6 +1212,20 @@ bool SampleProfileLoader::inlineHotFunctions( } } } +#if defined(ENABLE_AUTOTUNER) + if (autotuning::Engine.isEnabled()) { + // If a callsite is hot/cold, mark its corresponding callee as + // hot/cold respectively so that auto-tuning engine will be able to + // selectively dump code regions as tuning opportunities. + if (const CallInst *CI = dyn_cast(&I)) + if (Function *Callee = CI->getCalledFunction()) { + if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) + Callee->getATEFunction().setHot(); + else + Callee->getATEFunction().setCold(); + } + } +#endif } if (Hot || ExternalInlineAdvisor) { CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end()); diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index 424f1d433606771dfc27b76dc37810392e01d5e0..955353944b141ac82f908abb99d0ba1ff9835111 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -30,6 +30,7 @@ add_llvm_component_library(LLVMInstrumentation LINK_COMPONENTS Analysis + AutoTuner Core Demangle MC diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 3c8f25d73c623d2c11996ca14c1628b92074699f..b9459b59e70455eca6a4840082fe1a5eea02e08f 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -2132,6 +2132,10 @@ static bool annotateAllFunctions( F->addFnAttr(Attribute::InlineHint); LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() << "\n"); +#if defined(ENABLE_AUTOTUNER) + if (autotuning::Engine.isEnabled()) + F->getATEFunction().setHot(); +#endif } for (auto &F : ColdFunctions) { // Only set when there is no Attribute::Hot set by the user. For Hot @@ -2148,6 +2152,10 @@ static bool annotateAllFunctions( F->addFnAttr(Attribute::Cold); LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() << "\n"); +#if defined(ENABLE_AUTOTUNER) + if (autotuning::Engine.isEnabled()) + F->getATEFunction().setCold(); +#endif } return true; } diff --git a/llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp b/llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c33cb7cfc256a17faf12484ea73fb77e7bdc197d --- /dev/null +++ b/llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp @@ -0,0 +1,334 @@ +#if defined(ENABLE_AUTOTUNER) +//===--------------- AutoTuningCompile.cpp - Auto-Tuning ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass implements incremental compilation for AutoTuner to reduce the +/// compilation time for tuning process. +/// This pass performs 2 operations. +/// 1. Writing module level IR files which can be used in subsequent +/// compilations for AutoTuner flow. So clang frontend don't have to process +/// the source code from scratch. +/// 2. Add/Remove attributes for modules and functions to enable/disable +/// execution of optimization pass(es). It further reduces the compilation +/// time by skipping optimization pass(es) (If feasible). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/AutoTuningCompile.h" +#include "llvm/Analysis/AutotuningDump.h" +#include "llvm/AutoTuner/AutoTuning.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Scalar.h" +#include + +// Enable debug messages for AutoTuning Compilation. +#define DEBUG_TYPE "autotuning-compile" + +using namespace llvm; + +extern cl::opt AutoTuningCompileMode; + +AutoTuningOptPassGate SkipPasses = AutoTuningOptPassGate(true); +AutoTuningOptPassGate RunPasses = AutoTuningOptPassGate(false); +bool AutoTuningCompileModule::SkipCompilation = false; + +static void writeFiles(Module &M, std::string Pass) { + if (autotuning::Engine.isGenerateOutput()) { + switch (AutoTuningCompileMode) { + case Basic: + case CoarseGrain: + if (Pass == autotuning::CompileOptionStart) { + LLVM_DEBUG(dbgs() << "AutoTuningCompile: IR files writing before Pass: " + << Pass << ".\n"); + auto ATD = new AutotuningDumpLegacy(/* Incremental Compilation */ true); + ATD->runOnModule(M); + } + break; + case FineGrain: + if (autotuning::Engine.hasOpportunities()) { + LLVM_DEBUG(dbgs() << "AutoTuningCompile: IR files writing before Pass: " + << Pass << ".\n"); + auto ATD = new AutotuningDumpLegacy(/* Incremental Compilation */ true); + ATD->runOnModule(M); + } + break; + default: + llvm_unreachable("AutoTuningCompile: Unknown AutoTuner Incremental " + "Compilation mode.\n"); + } + } +} + +bool AutoTuningOptPassGate::shouldRunPass(const StringRef PassName, + StringRef IRDescription) { + LLVM_DEBUG(dbgs() << "Skip pass '" << PassName + << "': " << (Skip ? "True" : "False") << '\n'); + return !Skip; +} + +bool AutoTuningOptPassGate::checkPass(const StringRef PassName, + const StringRef TargetDesc) { + if (PassName.startswith("AutoTuningCompile")) { + LLVM_DEBUG(dbgs() << "Running '" << PassName << "'pass.\n"); + return true; + } + + LLVM_DEBUG(dbgs() << "Skip pass '" << PassName + << "': " << (Skip ? "True" : "False") << '\n'); + return !Skip; +} + +AutoTuningCompileModule::AutoTuningCompileModule(std::string Pass) { + this->Pass = Pass; +} + +void AutoTuningCompileModule::writeIRFiles(Module &M) const { + writeFiles(M, Pass); +} + +bool AutoTuningCompileModule::modifyCompilationPipeline(Module &M) const { + bool Changed = false; + LLVM_DEBUG(dbgs() << "AutoTuningCompile: Deciding to enable/disable " + "optimization of module/functions. Pass: " + << Pass << '\n'); + + StringRef Filename = M.getName(); + size_t Pos = Filename.rfind(".ll"); + if (Pos == StringRef::npos) { + errs() << "AutoTuningCompile: Source file is not IR (.ll) file. " + "Disabling incremental compilation.\n"; + AutoTuningCompileMode = Inactive; + return Changed; + } + Filename = Filename.substr(0, Pos); + + switch (AutoTuningCompileMode) { + case Basic: + case CoarseGrain: + LLVM_DEBUG(dbgs() << "AutoTuningCompile: No change in opt pipeline for " + "Basic/CoarseGrain incremental compilation mode.\n"); + break; + case FineGrain: { + if (Pass == autotuning::CompileOptionStart) { + M.getContext().setOptPassGate(SkipPasses); + getAutoTuningOptPassGate().setSkip(true); + setSkipCompilation(true); + LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses enabled.\n"); + } else if (getSkipCompilation() && + (autotuning::Engine.shouldRunOptPass(Filename.str(), Pass) || + Pass == "end")) { + M.getContext().setOptPassGate(RunPasses); + getAutoTuningOptPassGate().setSkip(false); + setSkipCompilation(false); + LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses disabled.\n"); + } else + LLVM_DEBUG(dbgs() << "AutoTuningCompile: Old decision (SkipPasses = " + << (getSkipCompilation() ? "True" : "False") + << " ) continued.\n"); + + Changed = true; + break; + } + default: + llvm_unreachable( + "AutoTuningCompile: Unknown AutoTuner Incremental Compilation mode.\n"); + } + + return Changed; +} + +bool AutoTuningCompileModule::run(Module &M) { + bool Changed = false; + if (AutoTuningCompileMode == Inactive) + return Changed; + + if (!autotuning::Engine.isEnabled()) { + LLVM_DEBUG(dbgs() << "AutoTuningCompile: AutoTuner is not enabled.\n"); + return Changed; + } + + writeIRFiles(M); + + if (autotuning::Engine.isParseInput()) + Changed |= modifyCompilationPipeline(M); + + return Changed; +} + +AutoTuningCompileModuleLegacy::AutoTuningCompileModuleLegacy(std::string Pass) + : ModulePass(AutoTuningCompileModuleLegacy::ID) { + this->Pass = Pass; +} + +bool AutoTuningCompileModuleLegacy::runOnModule(Module &M) { + AutoTuningCompileModule Impl(Pass); + return Impl.run(M); +} + +char AutoTuningCompileModuleLegacy::ID = 0; + +StringRef AutoTuningCompileModuleLegacy::getPassName() const { + return "AutoTuner Incremental Compilation"; +} + +INITIALIZE_PASS(AutoTuningCompileModuleLegacy, "autotuning-compile-module", + "AutoTuner Incremental Compilation", false, false) + +// Public interface to the AutoTuningCompile pass +ModulePass *llvm::createAutoTuningCompileModuleLegacyPass(std::string Pass) { + return new AutoTuningCompileModuleLegacy(Pass); +} + +PreservedAnalyses AutoTuningCompileModulePass::run(Module &M, + ModuleAnalysisManager &) { + AutoTuningCompileModule Impl(Pass); + Impl.run(M); + return PreservedAnalyses::all(); +} + +AutoTuningCompileFunction::AutoTuningCompileFunction(std::string Pass) { + this->Pass = Pass; +} + +void AutoTuningCompileFunction::writeIRFiles(Module &M) { + if (IsModuleWritten) + return; + IsModuleWritten = true; + writeFiles(M, Pass); +} + +bool AutoTuningCompileFunction::modifyCompilationPipeline(Function &F) { + bool Changed = false; + LLVM_DEBUG(dbgs() << "AutoTuningCompile: Deciding to enable/disable " + "optimization of module/functions. Pass: " + << Pass << '\n'); + Module *M = F.getParent(); + StringRef Filename = M->getName(); + size_t Pos = Filename.rfind(".ll"); + if (Pos == StringRef::npos) { + errs() << "AutoTuningCompile: Source file is not IR (.ll) file. " + "Disabling incremental compilation.\n"; + AutoTuningCompileMode = Inactive; + return Changed; + } + Filename = Filename.substr(0, Pos); + + switch (AutoTuningCompileMode) { + case Basic: + case CoarseGrain: + LLVM_DEBUG(dbgs() << "AutoTuningCompile: No change in opt pipeline for " + "Basic/CoarseGrain incremental compilation mode.\n"); + break; + case FineGrain: { + if (!AutoTuningCompileModule::getSkipCompilation() && + Pass == autotuning::CompileOptionStart) { + if (!SkipDecision) { + M->getContext().setOptPassGate(SkipPasses); + getAutoTuningOptPassGate().setSkip(true); + SkipDecision = true; + } + AutoTuningCompileModule::setSkipCompilation(true); + LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses enabled.\n"); + } else if (AutoTuningCompileModule::getSkipCompilation() && + Pass != autotuning::CompileOptionStart && + (autotuning::Engine.shouldRunOptPass(Filename.str(), Pass) || + Pass == autotuning::CompileOptionEnd)) { + M->getContext().setOptPassGate(RunPasses); + getAutoTuningOptPassGate().setSkip(false); + SkipDecision = false; + AutoTuningCompileModule::setSkipCompilation(false); + LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses disabled.\n"); + } else + LLVM_DEBUG(dbgs() << "AutoTuningCompile: Old decision (SkipPasses = " + << (AutoTuningCompileModule::getSkipCompilation() + ? "True" + : "False") + << " ) continued.\n"); + + Changed = true; + break; + } + default: + llvm_unreachable( + "AutoTuningCompile: Unknown AutoTuner Incremental Compilation mode.\n"); + } + + return Changed; +} + +bool AutoTuningCompileFunction::run(Function &F) { + bool Changed = false; + if (AutoTuningCompileMode == Inactive) + return Changed; + + if (!autotuning::Engine.isEnabled()) { + LLVM_DEBUG(dbgs() << "AutoTuningCompile: AutoTuner is not enabled.\n"); + return Changed; + } + + writeIRFiles(*F.getParent()); + + if (autotuning::Engine.isParseInput()) + Changed |= modifyCompilationPipeline(F); + + return Changed; +} + +AutoTuningCompileFunctionLegacy::AutoTuningCompileFunctionLegacy( + std::string Pass) + : FunctionPass(AutoTuningCompileFunctionLegacy::ID) { + this->Pass = Pass; +} + +bool AutoTuningCompileFunctionLegacy::runOnFunction(Function &F) { + AutoTuningCompileFunction Impl(Pass); + return Impl.run(F); +} + +char AutoTuningCompileFunctionLegacy::ID = 0; + +StringRef AutoTuningCompileFunctionLegacy::getPassName() const { + return "AutoTuner Incremental Compilation"; +} + +INITIALIZE_PASS(AutoTuningCompileFunctionLegacy, "autotuning-compile-function", + "AutoTuner Incremental Compilation", false, false) + +// Public interface to the AutoTuningCompile pass +FunctionPass * +llvm::createAutoTuningCompileFunctionLegacyPass(std::string Pass) { + return new AutoTuningCompileFunctionLegacy(Pass); +} + +PreservedAnalyses +AutoTuningCompileFunctionPass::run(Function &F, FunctionAnalysisManager &AM) { + AutoTuningCompileFunction Impl(Pass); + Impl.run(F); + return PreservedAnalyses::all(); +} + +PreservedAnalyses +AutoTuningCompileLoopPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U) { + AutoTuningCompileFunction Impl(Pass); + Function *F = L.getHeader()->getParent(); + Impl.run(*F); + return PreservedAnalyses::all(); +} + +AutoTuningOptPassGate &llvm::getAutoTuningOptPassGate() { + static AutoTuningOptPassGate AutoTuningGate; + return AutoTuningGate; +} + +#endif diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index eb008c15903a744b5685197a50598d3ae950f486..e5a82ea8f923ff5527626d1ff900ebbc565f5b29 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_component_library(LLVMScalarOpts ADCE.cpp AlignmentFromAssumptions.cpp AnnotationRemarks.cpp + AutoTuningCompile.cpp BDCE.cpp CallSiteSplitting.cpp ConstantHoisting.cpp @@ -92,6 +93,7 @@ add_llvm_component_library(LLVMScalarOpts LINK_COMPONENTS AggressiveInstCombine Analysis + AutoTuner Core InstCombine Support diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 335b489d3cb25ba0fa140e4f8b32be5cfe61efed..feb8932eaae76c0fb1e91d201ed88037dd1bf652 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -66,6 +66,9 @@ #include #include #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif using namespace llvm; @@ -173,6 +176,10 @@ static cl::opt cl::desc("Default threshold (max size of unrolled " "loop), used in all but O3 optimizations")); +#if defined(ENABLE_AUTOTUNER) +static const std::string UnrollCountParamStr = "UnrollCount"; +#endif + /// A magic value for use with the Threshold parameter to indicate /// that the loop unroll should be performed regardless of how much /// code expansion would result. @@ -893,7 +900,12 @@ bool llvm::computeUnrollCount( OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, +#if defined(ENABLE_AUTOTUNER) + TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound, + unsigned int Invocation) { +#else TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) { +#endif UnrollCostEstimator UCE(*L, LoopSize); @@ -942,6 +954,43 @@ bool llvm::computeUnrollCount( } } +#if defined(ENABLE_AUTOTUNER) + // Priority 2.5 is using Unroll Count set by AutoTuner (if enabled). + if (autotuning::Engine.isEnabled()) { + // Create a code region for current loop. This code region will be added to + // opportunity list once all the relevant information is gathered. + autotuning::Engine.initContainer(L, DEBUG_TYPE, + L->getHeader()->getParent()->getName(), + /* addOpportunity */ false, Invocation); + + int NewValue = 0; // the int value is set by lookUpParams() + bool UnrollCountChanged = L->lookUpParams("UnrollCount", NewValue); + + if (UnrollCountChanged) { + // Setting the UP.Count with the value suggested by AutoTuner. + // AutoTuner will use UnrollCount = 0, 1, X, Y, Z in case of dynamic + // configuration and UnrollCount = 0, 1, 2, 4, 8 otherwise to find + // optimal configuration. Compiler will unroll the loop with suggested + // UnrollCount except when UnrollCount = 1 where AutoTuner is suggesting + // to try loop peeling. + UP.Count = NewValue; + UP.AllowExpensiveTripCount = true; + UP.Force = true; + UP.Runtime = true; + if (!UP.AllowRemainder && UP.Count != 1) + UP.Count = 0; + + // Check for Loop Peeling + if (UP.Count == 1) { + computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold); + UP.Runtime = (PP.PeelCount) ? false : UP.Runtime; + } + + return true; + } + } +#endif + // 3rd priority is exact full unrolling. This will eliminate all copies // of some exit test. UP.Count = 0; @@ -1119,6 +1168,59 @@ bool llvm::computeUnrollCount( return ExplicitUnroll; } +#if defined(ENABLE_AUTOTUNER) +// Given UnrollingPreferences count (UPCount) and TripCount for CodeRegion +// CR, compute the dynamic Unroll values for tuning and add it to CR. +static void +computeAutoTunerDynamicUnrollOptions(unsigned UPCount, unsigned TripCount, + const autotuning::CodeRegion &CR) { + std::vector DynamicTuningOptions; + unsigned int PotentialTuningOptions[2]; + unsigned int Idx = 0; + int Count = -1; + unsigned int CurrentOption = 2; + unsigned int MaxTuningCount = 64; + DynamicTuningOptions.push_back(0); + // Add LoopPeeling as an additional option. + DynamicTuningOptions.push_back(1); + if (!UPCount) { + TripCount = (TripCount > MaxTuningCount) ? MaxTuningCount : TripCount; + unsigned int Limit = (TripCount == 0) ? 8 : TripCount; + DynamicTuningOptions.push_back(TripCount ? TripCount : 8); + while (CurrentOption < Limit) { + PotentialTuningOptions[Idx] = CurrentOption; + CurrentOption *= 2; + Idx = (Idx + 1) % 2; + ++Count; + } + } else { + while (CurrentOption < UPCount) { + PotentialTuningOptions[Idx] = CurrentOption; + CurrentOption *= 2; + Idx = (Idx + 1) % 2; + ++Count; + } + if (TripCount != UPCount) { + if (CurrentOption == UPCount) { + CurrentOption *= 2; + } + if (!TripCount || CurrentOption < TripCount) { + PotentialTuningOptions[Idx] = CurrentOption; + ++Count; + } + } + if (UPCount != 1) + DynamicTuningOptions.push_back(UPCount); + } + + Count = std::min(1, Count); + while (Count >= 0) + DynamicTuningOptions.push_back(PotentialTuningOptions[Count--]); + + CR.addAutoTunerOptions("UnrollCount", DynamicTuningOptions); +} +#endif + static LoopUnrollResult tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, @@ -1132,7 +1234,12 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, std::optional ProvidedUpperBound, std::optional ProvidedAllowPeeling, std::optional ProvidedAllowProfileBasedPeeling, +#if defined(ENABLE_AUTOTUNER) + std::optional ProvidedFullUnrollMaxCount, + unsigned int Invocation = 0) { +#else std::optional ProvidedFullUnrollMaxCount) { +#endif LLVM_DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" @@ -1276,11 +1383,28 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, // computeUnrollCount() decides whether it is beneficial to use upper bound to // fully unroll the loop. bool UseUpperBound = false; + +#if defined(ENABLE_AUTOTUNER) + bool IsCountSetExplicitly = computeUnrollCount( + L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount, + MaxOrZero, TripMultiple, LoopSize, UP, PP, UseUpperBound, Invocation); + const autotuning::CodeRegion CR = L->getCodeRegion(); + // computeAutoTunerDynamicUnrollOptions() adds the dynamic Unroll values to + // the CodeRegion. + computeAutoTunerDynamicUnrollOptions(UP.Count, TripCount, CR); + + if (!UP.Count) { + autotuning::Engine.addOpportunity( + CR, {{UnrollCountParamStr, std::to_string(UP.Count)}}); + return LoopUnrollResult::Unmodified; + } +#else bool IsCountSetExplicitly = computeUnrollCount( L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP, UseUpperBound); if (!UP.Count) return LoopUnrollResult::Unmodified; +#endif if (PP.PeelCount) { assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step"); @@ -1300,8 +1424,16 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, // we had, so we don't want to unroll or peel again. if (PP.PeelProfiledIterations) L->setLoopAlreadyUnrolled(); +#if defined(ENABLE_AUTOTUNER) + autotuning::Engine.addOpportunity( + CR, {{UnrollCountParamStr, std::to_string(UP.Count)}}); + return LoopUnrollResult::PartiallyUnrolled; + } + autotuning::Engine.addOpportunity(CR, {{UnrollCountParamStr, "0"}}); +#else return LoopUnrollResult::PartiallyUnrolled; } +#endif return LoopUnrollResult::Unmodified; } @@ -1329,8 +1461,18 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, {UP.Count, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, UP.UnrollRemainder, ForgetAllSCEV}, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop); + +#if defined(ENABLE_AUTOTUNER) + if (UnrollResult == LoopUnrollResult::Unmodified) { + autotuning::Engine.addOpportunity(CR, {{UnrollCountParamStr, "0"}}); + return LoopUnrollResult::Unmodified; + } + autotuning::Engine.addOpportunity( + CR, {{UnrollCountParamStr, std::to_string(UP.Count)}}); +#else if (UnrollResult == LoopUnrollResult::Unmodified) return LoopUnrollResult::Unmodified; +#endif if (RemainderLoop) { std::optional RemainderLoopID = @@ -1379,6 +1521,20 @@ public: /// Otherwise, forgetAllLoops and rebuild when needed next. bool ForgetAllSCEV; +#if defined(ENABLE_AUTOTUNER) +private: + // 'InvocationCounter' keeps track of Invocation of Loop Unroll Pass and + // assign it to 'Invocation'. So each LoopUnroll Object knows when it is + // being invoked during optimization pipeline. It is used to identify the + // Invocation of a pass if it is invoked multiple times. AutoTuner will use + // this information to generate the Code Regions and apply the suggested + // configuration during the correct invocation of the Loop Unroll Pass. + static unsigned int InvocationCounter; + unsigned int Invocation; + +public: +#endif + std::optional ProvidedCount; std::optional ProvidedThreshold; std::optional ProvidedAllowPartial; @@ -1405,6 +1561,9 @@ public: ProvidedAllowPeeling(AllowPeeling), ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling), ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) { +#if defined(ENABLE_AUTOTUNER) + Invocation = InvocationCounter++; +#endif initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } @@ -1431,7 +1590,12 @@ public: /*OnlyFullUnroll*/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling, +#if defined(ENABLE_AUTOTUNER) + ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount, + Invocation); +#else ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount); +#endif if (Result == LoopUnrollResult::FullyUnrolled) LPM.markLoopAsDeleted(*L); @@ -1449,6 +1613,9 @@ public: getLoopAnalysisUsage(AU); } }; +#if defined(ENABLE_AUTOTUNER) +unsigned int LoopUnroll::InvocationCounter = 0; +#endif } // end anonymous namespace @@ -1496,6 +1663,11 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, std::string LoopName = std::string(L.getName()); +#if defined(ENABLE_AUTOTUNER) + // LoopFullUnrollPass will be invoked first during optimization pipeline. + unsigned int Invocation = 0; +#endif + bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE, /*BFI*/ nullptr, /*PSI*/ nullptr, @@ -1505,7 +1677,12 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, /*Runtime*/ false, /*UpperBound*/ false, /*AllowPeeling*/ true, /*AllowProfileBasedPeeling*/ false, +#if defined(ENABLE_AUTOTUNER) + /*FullUnrollMaxCount*/ std::nullopt, + /*Invocation*/ Invocation) != +#else /*FullUnrollMaxCount*/ std::nullopt) != +#endif LoopUnrollResult::Unmodified; if (!Changed) return PreservedAnalyses::all(); @@ -1588,6 +1765,11 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, bool Changed = false; +#if defined(ENABLE_AUTOTUNER) + // LoopUnrollPass will be invoked second during optimization pipeline. + unsigned int Invocation = 1; +#endif + // The unroller requires loops to be in simplified form, and also needs LCSSA. // Since simplification may add new inner loops, it has to run before the // legality and profitability checks. This means running the loop unroller @@ -1630,7 +1812,12 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, /*Count*/ std::nullopt, /*Threshold*/ std::nullopt, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling, +#if defined(ENABLE_AUTOTUNER) + UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount, + Invocation); +#else UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount); +#endif Changed |= Result != LoopUnrollResult::Unmodified; // The parent must not be damaged by unrolling! diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index 37b032e4d7c7e1954291be94c0c3617aced0b5e7..4b140e8d600bff5f56fcde32d4243234336cd6c9 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -64,4 +64,8 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeStraightLineStrengthReduceLegacyPassPass(Registry); initializePlaceBackedgeSafepointsLegacyPassPass(Registry); initializeLoopSimplifyCFGLegacyPassPass(Registry); +#if defined(ENABLE_AUTOTUNER) + initializeAutoTuningCompileFunctionLegacyPass(Registry); + initializeAutoTuningCompileModuleLegacyPass(Registry); +#endif } diff --git a/llvm/lib/Transforms/Scalar/Sink.cpp b/llvm/lib/Transforms/Scalar/Sink.cpp index 8b99f73b850b98c3fd2697d6de55c08aadf60f8a..b3c60686e252f824ab0e71fdb9e2a3cec9521a3e 100644 --- a/llvm/lib/Transforms/Scalar/Sink.cpp +++ b/llvm/lib/Transforms/Scalar/Sink.cpp @@ -248,6 +248,11 @@ namespace { } bool runOnFunction(Function &F) override { +#if defined(ENABLE_AUTOTUNER) + if (skipFunction(F)) + return false; +#endif + auto &DT = getAnalysis().getDomTree(); auto &LI = getAnalysis().getLoopInfo(); auto &AA = getAnalysis().getAAResults(); diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt index a870071f3f641dc2b9f48c879f5a1d86f23c8682..8616e7b923c026196379468eabb60cca6adebc65 100644 --- a/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -93,6 +93,7 @@ add_llvm_component_library(LLVMTransformUtils LINK_COMPONENTS Analysis + AutoTuner Core Support TargetParser diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp index c36b0533580b97cbefe2acff971b2d30306bc6ce..20a4edcb29db66e176a1a2c390edda96df107f7a 100644 --- a/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -491,6 +491,11 @@ char &llvm::LCSSAID = LCSSAWrapperPass::ID; /// Transform \p F into loop-closed SSA form. bool LCSSAWrapperPass::runOnFunction(Function &F) { +#if defined(ENABLE_AUTOTUNER) + if (skipFunction(F)) + return false; +#endif + LI = &getAnalysis().getLoopInfo(); DT = &getAnalysis().getDomTree(); auto *SEWP = getAnalysisIfAvailable(); diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 3e604fdf2e11ac63a6c536a81dbda87c044a6b35..2e42e7f1397f3f8171b5c41d45ef1a51dae5685e 100644 --- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -69,6 +69,9 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif using namespace llvm; #define DEBUG_TYPE "loop-simplify" @@ -793,6 +796,11 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } /// it in any convenient order) inserting preheaders... /// bool LoopSimplify::runOnFunction(Function &F) { +#if defined(ENABLE_AUTOTUNER) + if (autotuning::Engine.isEnabled() && skipFunction(F)) + return false; +#endif + bool Changed = false; LoopInfo *LI = &getAnalysis().getLoopInfo(); DominatorTree *DT = &getAnalysis().getDomTree(); diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 511dd61308f9270a883e6206f2a9041a55984a03..2d2c3e50514b6e2ac94a917be8982d8e144815f9 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -69,6 +69,9 @@ #include #include #include +#if defined(ENABLE_AUTOTUNER) +#include "llvm/AutoTuner/AutoTuning.h" +#endif namespace llvm { class DataLayout; diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt index 998dfd956575d3c1f21d71bdcd333ef20e1b24e4..f2c5c04abb134da835adc839951798190d36265c 100644 --- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt +++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_component_library(LLVMVectorize LINK_COMPONENTS Analysis + AutoTuner Core Support TransformUtils diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index f923f0be6621978d06a1a718404e605aafa15937..f13ce6853666c3da8b304a0b82ff602d6a51ca16 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -113,6 +113,18 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L, // Populate values with existing loop metadata. getHintsFromMetadata(); +#if defined(ENABLE_AUTOTUNER) + if (autotuning::Engine.isEnabled()) { + int NewValue = 0; + bool VectorizationInterleaveChanged = + L->lookUpParams("VectorizationInterleave", NewValue); + + if (VectorizationInterleaveChanged) { + Interleave.Value = NewValue; + } + } +#endif + // force-vector-interleave overrides DisableInterleaving. if (VectorizerParams::isInterleaveForced()) Interleave.Value = VectorizerParams::VectorizationInterleave; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b603bbe55dc9abd28520128fc42ff02004d17f76..46fab860f5a3c90ddb1a6d82558164f409d5101d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -10178,6 +10178,22 @@ LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts) VectorizeOnlyWhenForced(Opts.VectorizeOnlyWhenForced || !EnableLoopVectorization) {} +#if defined(ENABLE_AUTOTUNER) +// Given the iterleave count (IC) and CR, compute the dynamic values for +// interleave count. Then add it to CR. +static void +computeAutoTunerDynamicInterleaveOptions(unsigned IC, + const autotuning::CodeRegion &CR) { + + std::vector AutoTunerOptions{1, 2, 4}; + if (std::find(AutoTunerOptions.begin(), AutoTunerOptions.end(), IC) == + AutoTunerOptions.end()) + AutoTunerOptions[2] = IC; + + CR.addAutoTunerOptions("VectorizationInterleave", AutoTunerOptions); +} +#endif + bool LoopVectorizePass::processLoop(Loop *L) { assert((EnableVPlanNativePath || L->isInnermost()) && "VPlan-native path is not enabled. Only process inner loops."); @@ -10190,6 +10206,12 @@ bool LoopVectorizePass::processLoop(Loop *L) { << L->getHeader()->getParent()->getName() << "' from " << DebugLocStr << "\n"); +#if defined(ENABLE_AUTOTUNER) + // Initialize the loop for auto-tuning but do not add it + // as an tuning opportunity yet. + autotuning::Engine.initContainer( + L, LV_NAME, L->getHeader()->getParent()->getName(), false); +#endif LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI); LLVM_DEBUG( @@ -10422,6 +10444,18 @@ bool LoopVectorizePass::processLoop(Loop *L) { InterleaveLoop = false; } +#if defined(ENABLE_AUTOTUNER) + if (!VectorizerParams::isInterleaveForced()) { + // Compute the dynamic values for VectorizationInterleave and add it to the + // CodeRegion. + computeAutoTunerDynamicInterleaveOptions(IC, L->getCodeRegion()); + + // Add the current loop as a tuning opportunity explicitly. + autotuning::Engine.addOpportunity( + L->getCodeRegion(), {{"VectorizationInterleave", std::to_string(IC)}}); + } +#endif + // Override IC if user provided an interleave count. IC = UserIC > 0 ? UserIC : IC; diff --git a/llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml b/llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f483a269906a72d13d13542767fe8d2b99c884c0 --- /dev/null +++ b/llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml @@ -0,0 +1,8 @@ +--- !AutoTuning +Pass: loop-unroll +Name: [name] +Function: foo +CodeRegionType: loop +Args: + - UnrollCount: [number] +... diff --git a/llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll b/llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll new file mode 100644 index 0000000000000000000000000000000000000000..ceb9b4fb2ca65c370ebf0808ce7470939f91e45a --- /dev/null +++ b/llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll @@ -0,0 +1,65 @@ +; UNSUPPORTED: windows +; RUN: sed 's#\[number\]#0#g; s#\[name\]#for.body#g' \ +; RUN: %S/Inputs/unroll_template.yaml > %t.DEFAULT.yaml +; RUN: opt --disable-output %s -S -passes='require' \ +; RUN: -auto-tuning-input=%t.DEFAULT.yaml -auto-tuning-config-id=1 +; RUN: cat %T/../autotune_datadir/create-data-dir.ll/1.ll | FileCheck %s +; RUN: rm -rf %T/../autotune_datadir/* + +; RUN: cp %t.DEFAULT.yaml %T/../autotune_datadir/config.yaml +; RUN: opt %s -S -passes='require' -auto-tuning-config-id=1 +; RUN: cat %T/../autotune_datadir/create-data-dir.ll/1.ll | FileCheck %s +; RUN: rm -rf %T/../autotune_datadir/* + +; RUN: cp %t.DEFAULT.yaml %T/../autotune_datadir/config.yaml +; RUN: opt %s -S -passes='require' -enable-autotuning-dump +; RUN: echo -n %T/../autotune_datadir/IR_files/ > %t.filename +; RUN: echo -n "create-data-dir.ll/" >> %t.filename +; RUN: echo -n %s | sed 's#/#_#g' >> %t.filename +; RUN: echo -n ".ll" >> %t.filename +; RUN: cat %t.filename | xargs cat | FileCheck %s +; RUN: rm -rf %T/../autotune_datadir + +; ModuleID = 'search.c' +source_filename = "search.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; Function Attrs: argmemonly nofree norecurse nosync nounwind readonly uwtable +define dso_local i32 @search(ptr nocapture noundef readonly %Arr, i32 noundef %Value, i32 noundef %Size) { +entry: + %cmp5 = icmp sgt i32 %Size, 0 + br i1 %cmp5, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %Size to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.inc + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr inbounds i32, ptr %Arr, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp1 = icmp eq i32 %0, %Value + br i1 %cmp1, label %for.end.loopexit.split.loop.exit, label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.end, label %for.body + +for.end.loopexit.split.loop.exit: ; preds = %for.body + %1 = trunc i64 %indvars.iv to i32 + br label %for.end + +for.end: ; preds = %for.inc, %for.end.loopexit.split.loop.exit, %entry + %Idx.0.lcssa = phi i32 [ 0, %entry ], [ %1, %for.end.loopexit.split.loop.exit ], [ %Size, %for.inc ] + ret i32 %Idx.0.lcssa +} + +; Check that only loop body is inside the IR File. +; CHECK-LABEL: for.body: ; preds = +; CHECK-NEXT: %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Arr, i64 %indvars.iv +; CHECK-NEXT: %0 = load i32, ptr %arrayidx, align 4 +; CHECK-NEXT: %cmp1 = icmp eq i32 %0, %Value +; CHECK-NEXT: br i1 %cmp1, label %for.end.loopexit.split.loop.exit, label %for.inc diff --git a/llvm/test/AutoTuning/AutotuningDump/unroll.ll b/llvm/test/AutoTuning/AutotuningDump/unroll.ll new file mode 100644 index 0000000000000000000000000000000000000000..e8243da55fffdcd06b4a01ac3e34816d90329f1e --- /dev/null +++ b/llvm/test/AutoTuning/AutotuningDump/unroll.ll @@ -0,0 +1,35 @@ +; RUN: rm -rf %T.tmp/Output +; RUN: mkdir -p %T.tmp/Output +; RUN: rm %t.DEFAULT.yaml -rf +; RUN: sed 's#\[number\]#0#g; s#\[name\]#for.body#g' %S/Inputs/unroll_template.yaml > %t.DEFAULT.yaml +; RUN: env AUTOTUNE_DATADIR=%T.tmp/Output opt %s -S -passes='require' \ +; RUN: -auto-tuning-input=%t.DEFAULT.yaml -auto-tuning-config-id=1 +; RUN: env AUTOTUNE_DATADIR=%T.tmp/Output opt %s -S -passes='require' \ +; RUN: -auto-tuning-input=%t.DEFAULT.yaml -auto-tuning-config-id=2 +; RUN: cat %T.tmp/Output/unroll.ll/1.ll | FileCheck %s -check-prefix=DEFAULT +; RUN: cat %T.tmp/Output/unroll.ll/2.ll | FileCheck %s -check-prefix=DEFAULT +; UNSUPPORTED: windows + +define void @foo(i32* nocapture %a) { +entry: + br label %for.body +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body +for.end: ; preds = %for.body + ret void +} +; Check that only loop body is inside the IR File. +; DEFAULT-LABEL: for.body: ; preds = %for.body, %entry +; DEFAULT-NEXT: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] +; DEFAULT-NEXT: %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv +; DEFAULT: %exitcond = icmp eq i64 %indvars.iv.next, 64 +; DEFAULT: br i1 %exitcond, label %for.end, label %for.body + +; RUN: rm -rf %T.tmp/Output diff --git a/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5e669c17a71bbade9008dea70948949eb86a090 --- /dev/null +++ b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml @@ -0,0 +1,9 @@ +!AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 12835463591102937421, + CodeRegionType: loop, Function: test, Invocation: 0, Name: for.body, + Pass: loop-unroll} +--- !AutoTuning {Args: [{VectorizationInterleave: 2}], + CodeRegionHash: 12835463591102937421, CodeRegionType: loop, Function: test, + Invocation: 0, Name: for.body, Pass: loop-vectorize} +--- !AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 8430337282115614432, + CodeRegionType: loop, Function: test, Invocation: 1, Name: vector.body, + Pass: loop-unroll} diff --git a/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..738cf55ffe9a2510add48a219cedaa551b297a32 --- /dev/null +++ b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml @@ -0,0 +1,9 @@ +!AutoTuning {Args: [{UnrollCount: 2}], CodeRegionHash: 12835463591102937421, + CodeRegionType: loop, Function: test, Invocation: 0, Name: for.body, + Pass: loop-unroll} +--- !AutoTuning {Args: [{VectorizationInterleave: 2}], + CodeRegionHash: 12835463591102937421, CodeRegionType: loop, Function: test, + Invocation: 0, Name: for.body, Pass: loop-vectorize} +--- !AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 8430337282115614432, + CodeRegionType: loop, Function: test, Invocation: 1, Name: vector.body, + Pass: loop-unroll} diff --git a/llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll b/llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll new file mode 100644 index 0000000000000000000000000000000000000000..667a076b2d23e278c13947419d5788008d223b59 --- /dev/null +++ b/llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll @@ -0,0 +1,117 @@ +; ModuleID = 'test.c' +source_filename = "test.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@.str = private unnamed_addr constant [12 x i8] c"tmp <= 10.0\00", align 1 +@.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", align 1 +@__PRETTY_FUNCTION__.test = private unnamed_addr constant [12 x i8] c"void test()\00", align 1 + +; Function Attrs: nounwind uwtable +define dso_local void @test() #0 { +entry: + %cs = alloca i32, align 4 + %flush = alloca ptr, align 8 + %i = alloca i32, align 4 + %tmp = alloca double, align 8 + call void @llvm.lifetime.start.p0(i64 4, ptr %cs) #5 + store i32 16431360, ptr %cs, align 4, !tbaa !6 + call void @llvm.lifetime.start.p0(i64 8, ptr %flush) #5 + %0 = load i32, ptr %cs, align 4, !tbaa !6 + %conv = sext i32 %0 to i64 + %call = call noalias ptr @calloc(i64 noundef %conv, i64 noundef 8) #6 + store ptr %call, ptr %flush, align 8, !tbaa !10 + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #5 + call void @llvm.lifetime.start.p0(i64 8, ptr %tmp) #5 + store double 0.000000e+00, ptr %tmp, align 8, !tbaa !12 + store i32 0, ptr %i, align 4, !tbaa !6 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %1 = load i32, ptr %i, align 4, !tbaa !6 + %2 = load i32, ptr %cs, align 4, !tbaa !6 + %cmp = icmp slt i32 %1, %2 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %3 = load ptr, ptr %flush, align 8, !tbaa !10 + %4 = load i32, ptr %i, align 4, !tbaa !6 + %idxprom = sext i32 %4 to i64 + %arrayidx = getelementptr inbounds double, ptr %3, i64 %idxprom + %5 = load double, ptr %arrayidx, align 8, !tbaa !12 + %6 = load double, ptr %tmp, align 8, !tbaa !12 + %add = fadd double %6, %5 + store double %add, ptr %tmp, align 8, !tbaa !12 + br label %for.inc + +for.inc: ; preds = %for.body + %7 = load i32, ptr %i, align 4, !tbaa !6 + %inc = add nsw i32 %7, 1 + store i32 %inc, ptr %i, align 4, !tbaa !6 + br label %for.cond, !llvm.loop !14 + +for.end: ; preds = %for.cond + %8 = load double, ptr %tmp, align 8, !tbaa !12 + %cmp2 = fcmp ole double %8, 1.000000e+01 + br i1 %cmp2, label %if.then, label %if.else + +if.then: ; preds = %for.end + br label %if.end + +if.else: ; preds = %for.end + call void @__assert_fail(ptr noundef @.str, ptr noundef @.str.1, i32 noundef 11, ptr noundef @__PRETTY_FUNCTION__.test) #7 + unreachable + +if.end: ; preds = %if.then + %9 = load ptr, ptr %flush, align 8, !tbaa !10 + call void @free(ptr noundef %9) #5 + call void @llvm.lifetime.end.p0(i64 8, ptr %tmp) #5 + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #5 + call void @llvm.lifetime.end.p0(i64 8, ptr %flush) #5 + call void @llvm.lifetime.end.p0(i64 4, ptr %cs) #5 + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nounwind allocsize(0,1) +declare noalias ptr @calloc(i64 noundef, i64 noundef) #2 + +; Function Attrs: noreturn nounwind +declare void @__assert_fail(ptr noundef, ptr noundef, i32 noundef, ptr noundef) #3 + +; Function Attrs: nounwind +declare void @free(ptr noundef) #4 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +attributes #0 = { nounwind uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nounwind allocsize(0,1) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" } +attributes #3 = { noreturn nounwind "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" } +attributes #4 = { nounwind "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" } +attributes #5 = { nounwind } +attributes #6 = { nounwind allocsize(0,1) } +attributes #7 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.ident = !{!5} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"PIE Level", i32 2} +!3 = !{i32 7, !"uwtable", i32 2} +!4 = !{i32 7, !"frame-pointer", i32 1} +!5 = !{!"Huawei BiSheng Compiler clang version 18.0.0 (ssh://git@codehub-dg-y.huawei.com:2222/CompilerKernel/BiShengKernel/BiSheng.git 026024071a7fb66b26b65fb81da702cc5f0cf405)"} +!6 = !{!7, !7, i64 0} +!7 = !{!"int", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C/C++ TBAA"} +!10 = !{!11, !11, i64 0} +!11 = !{!"any pointer", !8, i64 0} +!12 = !{!13, !13, i64 0} +!13 = !{!"double", !8, i64 0} +!14 = distinct !{!14, !15} +!15 = !{!"llvm.loop.mustprogress"} diff --git a/llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll b/llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll new file mode 100644 index 0000000000000000000000000000000000000000..f905208a2f3bb7291f1b36177629c0fc2e7319cc --- /dev/null +++ b/llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll @@ -0,0 +1,11 @@ +; The purpose is to test the baseline IR is the same as the 1st iteration of +; autotuning process with --use-baseline-config enabled. +; RUN: rm %t.baseline %t.firstIt -f +; RUN: opt -O3 %S/Inputs/test.ll -o %t.baseline +; RUN: opt -O3 %S/Inputs/test.ll -o %t.firstIt_baseline \ +; RUN: -auto-tuning-input=%S/Inputs/autotune_datadir/baseline_config.yaml +; RUN: cmp %t.firstIt_baseline %t.baseline + +; RUN: opt -O3 %S/Inputs/test.ll -o %t.firstIt_random \ +; RUN: -auto-tuning-input=%S/Inputs/autotune_datadir/random_config.yaml +; RUN: not cmp %t.firstIt_random %t.baseline diff --git a/llvm/test/AutoTuning/BaselineConfig/opp.ll b/llvm/test/AutoTuning/BaselineConfig/opp.ll new file mode 100644 index 0000000000000000000000000000000000000000..b2897316fc220fca7e3afdeb5adcbb7fcfc35290 --- /dev/null +++ b/llvm/test/AutoTuning/BaselineConfig/opp.ll @@ -0,0 +1,67 @@ +; REQUIRES: asserts +; RUN: rm %t.callsite_opp -rf +; RUN: opt %s -O3 -debug-only=inline -disable-output -S 2>&1 | \ +; RUN: FileCheck %s -check-prefix=DEFAULT +; RUN: opt %s -O3 -auto-tuning-opp=%t.callsite_opp -disable-output -S 2>&1 +; RUN: FileCheck %s --input-file %t.callsite_opp/opp.ll.yaml -check-prefix=AUTOTUNE + +@a = global i32 4 + +; Function Attrs: nounwind readnone uwtable +define i32 @simpleFunction(i32 %a) #0 { +entry: + call void @extern() + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %a2 = load volatile i32, i32* @a + %x2 = add i32 %x1, %a2 + %a3 = load volatile i32, i32* @a + %x3 = add i32 %x2, %a3 + %a4 = load volatile i32, i32* @a + %x4 = add i32 %x3, %a4 + %a5 = load volatile i32, i32* @a + %x5 = add i32 %x4, %a5 + %a6 = load volatile i32, i32* @a + %x6 = add i32 %x5, %a6 + %a7 = load volatile i32, i32* @a + %x7 = add i32 %x6, %a6 + %a8 = load volatile i32, i32* @a + %x8 = add i32 %x7, %a8 + %a9 = load volatile i32, i32* @a + %x9 = add i32 %x8, %a9 + %a10 = load volatile i32, i32* @a + %x10 = add i32 %x9, %a10 + %a11 = load volatile i32, i32* @a + %x11 = add i32 %x10, %a11 + %a12 = load volatile i32, i32* @a + %x12 = add i32 %x11, %a12 + %add = add i32 %x12, %a + ret i32 %add +} + +; Function Attrs: nounwind readnone uwtable +define i32 @bar(i32 %a) #0 { +entry: + %0 = tail call i32 @simpleFunction(i32 6) + ret i32 %0 +} + +declare void @extern() + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind cold readnone uwtable } + + +; NOTE: Need to make sure the function inling have the same behaviour as O3 and +; 'BaselineConfig' +; DEFAULT: Inlining calls in: bar +; DEFAULT: Inlining (cost=115, threshold=375), Call: %0 = tail call i32 @simpleFunction(i32 6) + +; AUTOTUNE: Pass: inline +; AUTOTUNE-NEXT: Name: simpleFunction +; AUTOTUNE-NEXT: Function: bar +; AUTOTUNE-NEXT: CodeRegionType: callsite +; AUTOTUNE-NEXT: CodeRegionHash: {{[0-9]+}} +; AUTOTUNE-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; AUTOTUNE-NEXT: BaselineConfig: { ForceInline: '1' } +; AUTOTUNE-NEXT: Invocation: 0 diff --git a/llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll b/llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll new file mode 100644 index 0000000000000000000000000000000000000000..13acafae6fc49d2698f78b1dd364e4930f226e74 --- /dev/null +++ b/llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll @@ -0,0 +1,62 @@ +; REQUIRES: asserts + +; RUN: rm -rf %t.filter +; RUN: opt %s -S -passes='function(require,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop --disable-output +; RUN: FileCheck %s --input-file %t.filter/function-filtering.ll.yaml -check-prefix=DEFAULT + +; RUN: rm -rf %t.filter +; RUN: opt %s -S -passes='function(require,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop \ +; RUN: -auto-tuning-function-filter=foo --disable-output +; RUN: FileCheck %s --input-file %t.filter/function-filtering.ll.yaml -check-prefix=FILTER_FOO + +; RUN: rm -rf %t.filter +; RUN: opt %s -S -passes='function(require,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop \ +; RUN: -auto-tuning-function-filter=bar --disable-output +; RUN: FileCheck %s --input-file %t.filter/function-filtering.ll.yaml -check-prefix=FILTER_BAR + +; RUN: rm -rf %t.filter +; RUN: opt %s -S -passes='function(require,loop-unroll),cgscc(inline)' \ +; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop \ +; RUN: -auto-tuning-function-filter=dummy -debug-only=autotuning | \ +; RUN: FileCheck %s -check-prefix=FILTER_DUMMY + +define void @foo(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +define void @bar(i32* nocapture %a) { +entry: + call void @foo(i32* %a) + ret void +} + +; DEFAULT: --- !AutoTuning +; DEFAULT: --- !AutoTuning + +; FILTER_FOO: --- !AutoTuning +; FILTER_FOO: Function: foo +; FILTER_FOO-NOT: --- !AutoTuning + +; FILTER_BAR: --- !AutoTuning +; FILTER_BAR: Function: bar +; FILTER_BAR-NOT: --- !AutoTuning + +; FILTER_DUMMY-NOT: --- !AutoTuning +; FILTER_DUMMY-NOT: --- !AutoTuning diff --git a/llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml b/llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c203e58f0abdfd25348496753108bfad2797320 --- /dev/null +++ b/llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml @@ -0,0 +1,3 @@ + + this is a xml file + diff --git a/llvm/test/AutoTuning/Error/Inputs/template.yaml b/llvm/test/AutoTuning/Error/Inputs/template.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f02b52ffb387fe2ccd565e7dc8bb497fdd1bac1 --- /dev/null +++ b/llvm/test/AutoTuning/Error/Inputs/template.yaml @@ -0,0 +1,10 @@ +--- !AutoTuning +Pass: pass +Name: for.body +Function: foo +CodeRegionType: loop +CodeRegionHash: 0 +Args: + - UnrollCount: 2 + - PassOrder: [test, test2] +... diff --git a/llvm/test/AutoTuning/Error/file-not-found-error.ll b/llvm/test/AutoTuning/Error/file-not-found-error.ll new file mode 100644 index 0000000000000000000000000000000000000000..6a364239a27138e08a8a6dabd8200209d94e5e63 --- /dev/null +++ b/llvm/test/AutoTuning/Error/file-not-found-error.ll @@ -0,0 +1,29 @@ +; RUN: rm %t.non-existing.yaml -rf +; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.non-existing.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR + +; UNSUPPORTED: windows + +define void @foo(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; check if error massage is shown properly when input yaml is not found +; +; ERROR: Error parsing auto-tuning input. +; ERROR: No such file or directory diff --git a/llvm/test/AutoTuning/Error/invalid-yaml-error.ll b/llvm/test/AutoTuning/Error/invalid-yaml-error.ll new file mode 100644 index 0000000000000000000000000000000000000000..bfc8784c4ea4ba358149d5d6ceffd33077c8106f --- /dev/null +++ b/llvm/test/AutoTuning/Error/invalid-yaml-error.ll @@ -0,0 +1,27 @@ +; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%S/Inputs/invalid-format.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR + +; UNSUPPORTED: windows + +define void @foo(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; check if error massage is shown properly when input yaml is in invalid format +; +; ERROR: error: YAML:1:1: error: document root is not of mapping type. diff --git a/llvm/test/AutoTuning/Error/malformed-input-error.ll b/llvm/test/AutoTuning/Error/malformed-input-error.ll new file mode 100644 index 0000000000000000000000000000000000000000..0b73c31955032f538e452045aabb571098a9b171 --- /dev/null +++ b/llvm/test/AutoTuning/Error/malformed-input-error.ll @@ -0,0 +1,136 @@ +; Check if error messages are shown properly for malformed YAML files. + +; Missing Pass Field +; RUN: rm %t.missing-pass.yaml -rf +; RUN: sed 's#Pass: pass##g' %S/Inputs/template.yaml > %t.missing-pass.yaml +; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-pass.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-FIELD + +; Missing Pass Value +; RUN: rm %t.missing-value-pass.yaml -rf +; RUN: sed 's#pass##g' %S/Inputs/template.yaml > %t.missing-value-pass.yaml +; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-value-pass.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-PASS-VALUE + +; Missing Name Field +; RUN: rm %t.missing-name.yaml -rf +; RUN: sed 's#Name: for.body##g' %S/Inputs/template.yaml > %t.missing-name.yaml +; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-name.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-NAME-FIELD + +; Missing Name Value +; RUN: rm %t.missing-value-name.yaml -rf +; RUN: sed 's#for.body##g' %S/Inputs/template.yaml > %t.missing-value-name.yaml +; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-value-name.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-NAME-VALUE + +; Missing Function Field +; RUN: rm %t.missing-function.yaml -rf +; RUN: sed 's#Function: foo##g' %S/Inputs/template.yaml > %t.missing-function.yaml +; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' -auto-tuning-input=%t.missing-function.yaml 2>&1 | FileCheck %s -check-prefix=ERROR-FUNCTION-FIELD + +; Missing Function Value +; RUN: rm %t.missing-value-func.yaml -rf +; RUN: sed 's#foo##g' %S/Inputs/template.yaml > %t.missing-value-func.yaml +; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-value-func.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-FUNC-VALUE + +; Missing CodeRegionType Field +; RUN: rm %t.missing-type.yaml -rf +; RUN: sed 's#CodeRegionType: loop##g' %S/Inputs/template.yaml > %t.missing-type.yaml +; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-type.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-CODE-REGION-TYPE-FIELD + +; Missing CodeRegionType Value +; RUN: rm %t.missing-value-type.yaml -rf +; RUN: sed 's#loop##g' %S/Inputs/template.yaml > %t.missing-value-type.yaml +; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-value-type.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-CODE-REGION-TYPE-VALUE + +; Invalid CodeRegionType Value +; RUN: rm %t.invalid-value-type.yaml -rf +; RUN: sed 's#loop#error-type#g' %S/Inputs/template.yaml > %t.invalid-value-type.yaml +; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.invalid-value-type.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-CODE-REGION-TYPE-INVALID + +; Missing Param Name +; RUN: rm %t.missing-param-name.yaml -rf +; RUN: sed 's#UnrollCount##g' %S/Inputs/template.yaml > %t.missing-param-name.yaml +; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-param-name.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-PARAM-NAME + +; Missing Param Value +; RUN: rm %t.missing-value-param.yaml -rf +; RUN: sed 's#2##g' %S/Inputs/template.yaml > %t.missing-value-param.yaml +; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.missing-value-param.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=ERROR-PARAM-VALUE + +; Empty Param List +; RUN: rm %t.empty-value-param-list.yaml -rf +; RUN: sed 's#\[test, test2\]#\[\]#g' %S/Inputs/template.yaml > %t.empty-value-param-list.yaml +; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.empty-value-param-list.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=VALID + +; UNSUPPORTED: windows + +define void @foo(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; check if error massage is shown properly for malformed YAML input files. +; + +; ERROR-FIELD: error: CodeRegionHash, CodeRegionType, or Pass missing. + +; ERROR-NAME-FIELD: error: Remark Name expected; enable -autotuning-omit-metadata. + +; ERROR-FUNCTION-FIELD: error: Remark Function Name expected; enable -autotuning-omit-metadata. + +; ERROR-PASS-VALUE: error: YAML:2:1: error: expected a value of scalar type. +; ERROR-PASS-VALUE: Pass: + +; ERROR-NAME-VALUE: error: YAML:3:1: error: expected a value of scalar type. +; ERROR-NAME-VALUE: Name: + +; ERROR-FUNC-VALUE: error: YAML:4:1: error: expected a value of scalar type. +; ERROR-FUNC-VALUE: Function: + +; ERROR-CODE-REGION-TYPE-FIELD: CodeRegionHash, CodeRegionType, or Pass missing. + +; ERROR-CODE-REGION-TYPE-VALUE: error: YAML:5:1: error: expected a value of scalar type. +; ERROR-CODE-REGION-TYPE-VALUE: CodeRegionType: + +; ERROR-CODE-REGION-TYPE-INVALID: Unsupported CodeRegionType:error-type + +; ERROR-PARAM-NAME: error: YAML:8:5: error: argument key is missing. +; ERROR-PARAM-NAME: - : 2 + +; ERROR-PARAM-VALUE: error: YAML:8:5: error: expected a value of scalar type. +; ERROR-PARAM-VALUE: - UnrollCount: + +; VALID-NOT: -auto-tuning-input=(input file) option failed. diff --git a/llvm/test/AutoTuning/Error/output-error.ll b/llvm/test/AutoTuning/Error/output-error.ll new file mode 100644 index 0000000000000000000000000000000000000000..61ffba50924b084383e6e571d7ea40d2e2052ca3 --- /dev/null +++ b/llvm/test/AutoTuning/Error/output-error.ll @@ -0,0 +1,28 @@ +; RUN: rm %t.opp -rf; touch %t.opp +; RUN: not opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-opp=%t.opp 2>&1 | FileCheck %s -check-prefix=ERROR-OPP + +; UNSUPPORTED: windows + +define void @foo(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; check if error massage is shown properly when output files cannot be created +; +; ERROR-OPP: Error generating auto-tuning opportunities. +; ERROR-OPP: error: Not a directory diff --git a/llvm/test/AutoTuning/Error/valid-input.ll b/llvm/test/AutoTuning/Error/valid-input.ll new file mode 100644 index 0000000000000000000000000000000000000000..dae90cdbe4081a8459440a4c8686151a1dbfb1ac --- /dev/null +++ b/llvm/test/AutoTuning/Error/valid-input.ll @@ -0,0 +1,27 @@ +; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%S/Inputs/template.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=VALID +; UNSUPPORTED: windows + +define void @foo(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; check if error massage is shown properly when the input is valid +; + +; VALID-NOT: -auto-tuning-input=(input file) option failed. diff --git a/llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml b/llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a7d390be63e73072f866117269b707d8b1734b93 --- /dev/null +++ b/llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml @@ -0,0 +1,9 @@ +--- !AutoTuning +Pass: [dummy-pass] +CodeRegionType: [dummy-type] +Name: foo +DebugLoc: { File: [dummy-file], Line: 0, Column: 0 } +Function: foo +CodeRegionHash: 0 +Invocation: 0 +... diff --git a/llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll b/llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll new file mode 100644 index 0000000000000000000000000000000000000000..b9dc81089d40d0f0db753e6a59e71f242ef8244a --- /dev/null +++ b/llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll @@ -0,0 +1,103 @@ +; REQUIRES: asserts +; RUN: rm %t.output -rf +; RUN: rm %t.inc_compile.yaml -rf +; RUN: sed 's#\[dummy-pass\]#inline#g' %S/Inputs/template.yaml > %t.temp.yaml +; RUN: sed 's#\[dummy-type\]#callsite#g' %t.temp.yaml > %t.temp2.yaml +; RUN: sed 's#\[dummy-file\]#%s#g' %t.temp2.yaml > %t.inc_compile.yaml +; RUN: opt -O3 %s -auto-tuning-input=%t.inc_compile.yaml \ +; RUN: -auto-tuning-compile-mode=CoarseGrain -print-after-all \ +; RUN: -debug-only=autotuning-compile \ +; RUN: -o %t.output 2>&1 | \ +; RUN: FileCheck %s -check-prefix=COARSEGRAIN + +; RUN: rm %t.output -rf +; RUN: rm %t.inc_compile.yaml -rf +; RUN: sed 's#\[dummy-pass\]#inline#g' %S/Inputs/template.yaml > %t.temp.yaml +; RUN: sed 's#\[dummy-type\]#callsite#g' %t.temp.yaml > %t.temp2.yaml +; RUN: sed 's#\[dummy-file\]#%s#g' %t.temp2.yaml > %t.inc_compile.yaml +; RUN: opt -O3 %s -auto-tuning-input=%t.inc_compile.yaml \ +; RUN: -auto-tuning-compile-mode=FineGrain -print-after-all \ +; RUN: -debug-only=autotuning-compile \ +; RUN: -o %t.output 2>&1 | \ +; RUN: FileCheck %s -check-prefixes=FINEGRAIN-1,FINEGRAIN-INLINE + +; RUN: rm %t.output -rf +; RUN: rm %t.inc_compile.yaml -rf +; RUN: sed 's#\[dummy-pass\]#loop-unroll#g' %S/Inputs/template.yaml > %t.temp.yaml +; RUN: sed 's#\[dummy-type\]#loop#g' %t.temp.yaml > %t.temp2.yaml +; RUN: sed 's#\[dummy-file\]#%s#g' %t.temp2.yaml > %t.inc_compile.yaml +; RUN: opt -O3 %s -auto-tuning-input=%t.inc_compile.yaml \ +; RUN: -auto-tuning-compile-mode=FineGrain -print-after-all \ +; RUN: -debug-only=autotuning-compile \ +; RUN: -o %t.output 2>&1 | \ +; RUN: FileCheck %s -check-prefixes=FINEGRAIN-1,FINEGRAIN-2,FINEGRAIN-UNROLL + +; ModuleID = 'test.c' +source_filename = "test.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; Function Attrs: argmemonly nofree norecurse nosync nounwind uwtable +define dso_local i32 @test(i32* nocapture noundef %a, i32* nocapture noundef readonly %b, i32 noundef %size) local_unnamed_addr #0 { +entry: + %cmp11 = icmp sgt i32 %size, 0 + br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %size to i64 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret i32 undef + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + store i32 %add, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +attributes #0 = { argmemonly nofree norecurse nosync nounwind uwtable "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon,+v8a" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei BiSheng Compiler clang version 12.0.0 (1c7b819ced36)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/home/m00629332/code/autoTuner") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 1, !"branch-target-enforcement", i32 0} +!6 = !{i32 1, !"sign-return-address", i32 0} +!7 = !{i32 1, !"sign-return-address-all", i32 0} +!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} +!9 = !{!"Huawei BiSheng Compiler clang version 12.0.0 (1c7b819ced36)"} +!10 = distinct !DISubprogram(name: "dummy", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!11 = !DISubroutineType(types: !2) +!12 = !DILocation(line: 2, column: 5, scope: !10) + +; COARSEGRAIN: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: start +; COARSEGRAIN-NEXT: AutoTuningCompile: No change in opt pipeline for Basic/CoarseGrain incremental compilation mode. +; COARSEGRAIN-NOT: Skip pass {{.*}}: True + +; FINEGRAIN-1: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: start +; FINEGRAIN-1-NEXT: AutoTuningCompile: SkipPasses enabled. +; FINEGRAIN-1-NOT: Skip pass {{.*}}: False +; FINEGRAIN-1: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: inline +; FINEGRAIN-INLINE: AutoTuningCompile: SkipPasses disabled. +; FINEGRAIN-INLINE: Skip pass 'InlinerPass': False +; FINEGRAIN-INLINE-NEXT: *** IR Dump After InlinerPass +; FINEGRAIN-INLINE-NOT: Skip pass {{.*}}: True + +; FINEGRAIN-2: AutoTuningCompile: Old decision (SkipPasses = True ) continued. +; FINEGRAIN-2-NOT: Skip pass {{.*}}: False +; FINEGRAIN-2: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: loop-unroll +; FINEGRAIN-UNROLL: AutoTuningCompile: SkipPasses disabled. +; FINEGRAIN-UNROLL-NOT: Skip pass {{.*}}: True diff --git a/llvm/test/AutoTuning/Inline/Inputs/template.yaml b/llvm/test/AutoTuning/Inline/Inputs/template.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e04612183d1f8d7f1898a5b6f623d1a6ce8c53b6 --- /dev/null +++ b/llvm/test/AutoTuning/Inline/Inputs/template.yaml @@ -0,0 +1,9 @@ +--- !AutoTuning +Pass: inline +Name: simpleFunction-entry +Function: bar +CodeRegionType: callsite +CodeRegionHash: 5550568187071847048 +Args: + - ForceInline: [force-inline] +... diff --git a/llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml b/llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9fc88f56d6bc1d246f7e1676b446a66621bfe890 --- /dev/null +++ b/llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml @@ -0,0 +1,7 @@ +--- !AutoTuning +Pass: inline +CodeRegionType: callsite +CodeRegionHash: 5550568187071847048 +Args: + - ForceInline: [force-inline] +... diff --git a/llvm/test/AutoTuning/Inline/duplicate-calls.ll b/llvm/test/AutoTuning/Inline/duplicate-calls.ll new file mode 100644 index 0000000000000000000000000000000000000000..ad32262ad04456b69376b3fa81355caacf435ecd --- /dev/null +++ b/llvm/test/AutoTuning/Inline/duplicate-calls.ll @@ -0,0 +1,96 @@ +; RUN: rm %t.duplicate_calls -rf +; RUN: opt %s -S -passes='cgscc(inline)' -auto-tuning-opp=%t.duplicate_calls \ +; RUN: -auto-tuning-type-filter=CallSite --disable-output +; RUN: FileCheck %s --input-file %t.duplicate_calls/duplicate-calls.ll.yaml + +; ModuleID = 'duplicate-calls.c' +source_filename = "duplicate-calls.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define dso_local void @bar(i32* nocapture %result, i32* %cfb, i32 %bytes) local_unnamed_addr #0 !dbg !10 { +entry: + %call = tail call i32 @test(i32* %cfb, i32 %bytes) #1, !dbg !12 + store i32 %call, i32* %result, align 4, !dbg !13, !tbaa !14 + ret void, !dbg !18 +} + +declare dso_local i32 @test(i32*, i32) local_unnamed_addr #0 + +; Function Attrs: nounwind uwtable +define dso_local void @foo(i32* %cfb, i32* readnone %saved, i32* nocapture %result, i32 %bytes) local_unnamed_addr #0 !dbg !19 { +entry: + %tobool.not = icmp eq i32* %cfb, null, !dbg !20 + br i1 %tobool.not, label %if.else, label %if.then.split, !dbg !20 + +if.then.split: ; preds = %entry + tail call void @bar(i32* %result, i32* nonnull %cfb, i32 %bytes), !dbg !21 + br label %return, !dbg !22 + +if.else: ; preds = %entry + %tobool1.not = icmp eq i32* %saved, null, !dbg !23 + br i1 %tobool1.not, label %if.else.split, label %return, !dbg !23 + +if.else.split: ; preds = %if.else + tail call void @bar(i32* %result, i32* null, i32 %bytes), !dbg !21 + br label %return, !dbg !23 + +return: ; preds = %if.then.split, %if.else.split, %if.else + ret void, !dbg !24 +} + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei BiSheng Compiler clang version 12.0.0 (clang-0d5d71fe6c22 flang-8b17fc131076)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "duplicate-calls.c", directory: "/home/m00629332/benchmarks/cBench/source/security_pgp_d/src") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 1, !"branch-target-enforcement", i32 0} +!6 = !{i32 1, !"sign-return-address", i32 0} +!7 = !{i32 1, !"sign-return-address-all", i32 0} +!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} +!9 = !{!"Huawei BiSheng Compiler clang version 12.0.0 (clang-0d5d71fe6c22 flang-8b17fc131076)"} +!10 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 7, type: !11, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!11 = !DISubroutineType(types: !2) +!12 = !DILocation(line: 10, column: 16, scope: !10) +!13 = !DILocation(line: 10, column: 14, scope: !10) +!14 = !{!15, !15, i64 0} +!15 = !{!"int", !16, i64 0} +!16 = !{!"omnipotent char", !17, i64 0} +!17 = !{!"Simple C/C++ TBAA"} +!18 = !DILocation(line: 14, column: 1, scope: !10) +!19 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 17, type: !11, scopeLine: 18, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!20 = !DILocation(line: 22, column: 6, scope: !19) +!21 = !DILocation(line: 27, column: 2, scope: !19) +!22 = !DILocation(line: 23, column: 3, scope: !19) +!23 = !DILocation(line: 24, column: 11, scope: !19) +!24 = !DILocation(line: 28, column: 1, scope: !19) + +; CHECK: --- !AutoTuning +; CHECK-NEXT: Pass: inline +; CHECK-NEXT: Name: bar-if.then.split +; CHECK-NEXT: DebugLoc: { File: duplicate-calls.c, Line: 27, Column: 2 } +; CHECK-NEXT: Function: foo +; CHECK-NEXT: CodeRegionType: callsite +; CHECK-NEXT: CodeRegionHash: +; CHECK-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; CHECK-NEXT: BaselineConfig: { ForceInline: '1' } +; CHECK-NEXT: Invocation: 0 +; CHECK-NEXT: ... +; CHECK-NEXT: --- !AutoTuning +; CHECK-NEXT: Pass: inline +; CHECK-NEXT: Name: bar-if.else.split +; CHECK-NEXT: DebugLoc: { File: duplicate-calls.c, Line: 27, Column: 2 } +; CHECK-NEXT: Function: foo +; CHECK-NEXT: CodeRegionType: callsite +; CHECK-NEXT: CodeRegionHash: +; CHECK-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; CHECK-NEXT: BaselineConfig: { ForceInline: '1' } +; CHECK-NEXT: Invocation: 0 diff --git a/llvm/test/AutoTuning/Inline/force-inline.ll b/llvm/test/AutoTuning/Inline/force-inline.ll new file mode 100644 index 0000000000000000000000000000000000000000..cedfc8df3483ac411adccd838d6fbae5dbd0a806 --- /dev/null +++ b/llvm/test/AutoTuning/Inline/force-inline.ll @@ -0,0 +1,84 @@ +; REQUIRES: asserts +; RUN: opt < %s -passes=inline -debug-only=inline -disable-output -S 2>&1 | FileCheck %s -check-prefix=DEFAULT +; simpleFunction will be inlined with the default behavior. + +; RUN: rm %t.force-inline.yaml -rf +; RUN: sed 's#\[force-inline\]#true#g' %S/Inputs/template.yaml > %t.force-inline.yaml +; RUN: opt %s -passes=inline -debug-only=inline -disable-output -S \ +; RUN: -auto-tuning-input=%t.force-inline.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=FORCE-INLINE +; Test with ForceInline=true; + +; RUN: rm %t.force-inline.yaml -rf +; RUN: sed 's#\[force-inline\]#true#g' %S/Inputs/template_no_metadata.yaml > %t.force-inline.yaml +; RUN: opt %s -passes=inline -S -auto-tuning-input=%t.force-inline.yaml \ +; RUN: -debug-only=inline -disable-output -auto-tuning-omit-metadata 2>&1 | \ +; RUN: FileCheck %s -check-prefix=FORCE-INLINE +; Test with ForceInline=true; + +; RUN: rm %t.no-inline.yaml -rf +; RUN: sed 's#\[force-inline\]#false#g' %S/Inputs/template.yaml > %t.no-inline.yaml +; RUN: opt %s -passes=inline -debug-only=inline -disable-output -S \ +; RUN: -auto-tuning-input=%t.no-inline.yaml 2>&1 | \ +; RUN: FileCheck %s -check-prefix=NO-INLINE +; Test with ForceInline=false; + +; RUN: rm %t.no-inline.yaml -rf +; RUN: sed 's#\[force-inline\]#false#g' %S/Inputs/template_no_metadata.yaml > %t.no-inline.yaml +; RUN: opt %s -passes='cgscc(inline)' -debug-only=inline -disable-output -S \ +; RUN: -auto-tuning-input=%t.no-inline.yaml -auto-tuning-omit-metadata 2>&1 | \ +; RUN: FileCheck %s -check-prefix=NO-INLINE +; Test with ForceInline=false; + +@a = global i32 4 + +; Function Attrs: nounwind readnone uwtable +define i32 @simpleFunction(i32 %a) #0 { +entry: + call void @extern() + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %a2 = load volatile i32, i32* @a + %x2 = add i32 %x1, %a2 + %a3 = load volatile i32, i32* @a + %x3 = add i32 %x2, %a3 + %a4 = load volatile i32, i32* @a + %x4 = add i32 %x3, %a4 + %a5 = load volatile i32, i32* @a + %x5 = add i32 %x4, %a5 + %a6 = load volatile i32, i32* @a + %x6 = add i32 %x5, %a6 + %a7 = load volatile i32, i32* @a + %x7 = add i32 %x6, %a6 + %a8 = load volatile i32, i32* @a + %x8 = add i32 %x7, %a8 + %a9 = load volatile i32, i32* @a + %x9 = add i32 %x8, %a9 + %a10 = load volatile i32, i32* @a + %x10 = add i32 %x9, %a10 + %a11 = load volatile i32, i32* @a + %x11 = add i32 %x10, %a11 + %a12 = load volatile i32, i32* @a + %x12 = add i32 %x11, %a12 + %add = add i32 %x12, %a + ret i32 %add +} + +; Function Attrs: nounwind readnone uwtable +define i32 @bar(i32 %a) #0 { +entry: + %0 = tail call i32 @simpleFunction(i32 6) + ret i32 %0 +} + +declare void @extern() + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind cold readnone uwtable } + +; DEFAULT: Inlining (cost=120, threshold=337) +; DEFAULT-SAME: simpleFunction +; FORCE-INLINE: Inlining (cost=always): Force inlined by auto-tuning +; FORCE-INLINE-SAME: simpleFunction +; NO-INLINE: NOT Inlining (cost=never): Force non-inlined by auto-tuning +; NO-INLINE-SAME: simpleFunction diff --git a/llvm/test/AutoTuning/Inline/inline-attribute.ll b/llvm/test/AutoTuning/Inline/inline-attribute.ll new file mode 100644 index 0000000000000000000000000000000000000000..50f583d0a51e850477454365fe3b6e56415ee5f7 --- /dev/null +++ b/llvm/test/AutoTuning/Inline/inline-attribute.ll @@ -0,0 +1,85 @@ +; RUN: rm %t.inline_opp -rf +; RUN: opt %s -S -passes='cgscc(inline)' -auto-tuning-opp=%t.inline_opp -auto-tuning-type-filter=CallSite --disable-output +; RUN: FileCheck %s --input-file %t.inline_opp/inline-attribute.ll.yaml -check-prefix=TEST-1 +; RUN: FileCheck %s --input-file %t.inline_opp/inline-attribute.ll.yaml -check-prefix=TEST-2 + +; ModuleID = 'inline.c' +source_filename = "inline.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; Function Attrs: noinline norecurse nounwind readnone uwtable willreturn +define dso_local i32 @mul(i32 %a) local_unnamed_addr #0 !dbg !10 { +entry: + %mul = mul nsw i32 %a, %a, !dbg !12 + ret i32 %mul, !dbg !13 +} + +; Function Attrs: alwaysinline nounwind uwtable +define dso_local i32 @add(i32 %a) local_unnamed_addr #1 !dbg !14 { +entry: + %add = shl nsw i32 %a, 1, !dbg !15 + ret i32 %add, !dbg !16 +} + +; Function Attrs: nounwind uwtable +define dso_local i32 @inc(i32 %a) local_unnamed_addr #2 !dbg !17 { +entry: + %inc = add nsw i32 %a, 1, !dbg !18 + ret i32 %inc, !dbg !19 +} + +; Function Attrs: nounwind uwtable +define dso_local i32 @func(i32 %a) local_unnamed_addr #2 !dbg !20 { +entry: + %call = call i32 @add(i32 %a), !dbg !21 + %call1 = call i32 @mul(i32 %a), !dbg !22 + %add = add nsw i32 %call, %call1, !dbg !23 + %call2 = call i32 @inc(i32 %a), !dbg !24 + %add3 = add nsw i32 %add, %call2, !dbg !25 + ret i32 %add3, !dbg !26 +} + +attributes #0 = { noinline norecurse nounwind readnone uwtable willreturn "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { alwaysinline nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei Bisheng Compiler clang version 12.0.0 (729941c4adfa)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/home/m00629332/code/autoTuner/ir-hashing") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 1, !"branch-target-enforcement", i32 0} +!6 = !{i32 1, !"sign-return-address", i32 0} +!7 = !{i32 1, !"sign-return-address-all", i32 0} +!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} +!9 = !{!"Huawei Bisheng Compiler clang version 12.0.0 (729941c4adfa)"} +!10 = distinct !DISubprogram(name: "mul", scope: !1, file: !1, line: 2, type: !11, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!11 = !DISubroutineType(types: !2) +!12 = !DILocation(line: 3, column: 13, scope: !10) +!13 = !DILocation(line: 3, column: 5, scope: !10) +!14 = distinct !DISubprogram(name: "add", scope: !1, file: !1, line: 7, type: !11, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!15 = !DILocation(line: 8, column: 13, scope: !14) +!16 = !DILocation(line: 8, column: 5, scope: !14) +!17 = distinct !DISubprogram(name: "inc", scope: !1, file: !1, line: 11, type: !11, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!18 = !DILocation(line: 12, column: 12, scope: !17) +!19 = !DILocation(line: 12, column: 5, scope: !17) +!20 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 15, type: !11, scopeLine: 15, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!21 = !DILocation(line: 16, column: 12, scope: !20) +!22 = !DILocation(line: 16, column: 19, scope: !20) +!23 = !DILocation(line: 16, column: 18, scope: !20) +!24 = !DILocation(line: 16, column: 26, scope: !20) +!25 = !DILocation(line: 16, column: 25, scope: !20) +!26 = !DILocation(line: 16, column: 5, scope: !20) + +; TEST-1: Pass: inline +; TEST-1-NOT: Pass: inline + +; TEST-2: Name: inc +; TEST-2-NEXT: DebugLoc: { File: test.c, Line: 16, Column: 26 } +; TEST-2-NEXT: Function: func +; TEST-2-NEXT: CodeRegionType: callsite diff --git a/llvm/test/AutoTuning/Inline/opp.ll b/llvm/test/AutoTuning/Inline/opp.ll new file mode 100644 index 0000000000000000000000000000000000000000..dfe1dac2947617026d6db0fa93d8c7690d64d22f --- /dev/null +++ b/llvm/test/AutoTuning/Inline/opp.ll @@ -0,0 +1,64 @@ +; RUN: rm %t.callsite_opp -rf +; RUN: sed 's#\[number\]#25#g; s#\[func_name\]#ColdFunction#g' %S/Inputs/template.yaml > %t.template25.yaml +; RUN: opt %s -passes=inline -S -auto-tuning-opp=%t.callsite_opp -auto-tuning-type-filter=CallSite + +; RUN: FileCheck %s --input-file %t.callsite_opp/opp.ll.yaml -check-prefix=CALLSITE + +@a = global i32 4 + +declare void @extern() +; Function Attrs: nounwind readnone uwtable +define i32 @simpleFunction(i32 %a) #1 { +entry: + call void @extern() + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %a2 = load volatile i32, i32* @a + %x2 = add i32 %x1, %a2 + %a3 = load volatile i32, i32* @a + %x3 = add i32 %x2, %a3 + %a4 = load volatile i32, i32* @a + %x4 = add i32 %x3, %a4 + %a5 = load volatile i32, i32* @a + %x5 = add i32 %x4, %a5 + %a6 = load volatile i32, i32* @a + %x6 = add i32 %x5, %a6 + %a7 = load volatile i32, i32* @a + %x7 = add i32 %x6, %a6 + %a8 = load volatile i32, i32* @a + %x8 = add i32 %x7, %a8 + %a9 = load volatile i32, i32* @a + %x9 = add i32 %x8, %a9 + %a10 = load volatile i32, i32* @a + %x10 = add i32 %x9, %a10 + %a11 = load volatile i32, i32* @a + %x11 = add i32 %x10, %a11 + %a12 = load volatile i32, i32* @a + %x12 = add i32 %x11, %a12 + %add = add i32 %x12, %a + ret i32 %add +} + +define i32 @bar(i32 %a) #0 { +entry: + %0 = tail call i32 @simpleFunction(i32 6) + ret i32 %0 +} + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind cold readnone uwtable } + +; Check if code regions are properly generated as tuning opportunities. +; CALLSITE: --- !AutoTuning +; CALLSITE-NEXT: Pass: inline +; CALLSITE-NEXT: Name: simpleFunction +; CALLSITE-NEXT: Function: bar +; CALLSITE-NEXT: CodeRegionType: callsite +; CALLSITE-NEXT: CodeRegionHash: {{[0-9]+}} +; CALLSITE-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] } +; CALLSITE-NEXT: BaselineConfig: { ForceInline: '1' } +; CALLSITE-NEXT: Invocation: 0 +; CALLSITE-NEXT: ... + +; Check if external functions are filtered out. +; EXTERNAL-NOT: Name: extern diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6dc49a1f7dc20df8d98a838ad04a9a8b619fb5c0 --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml @@ -0,0 +1,10 @@ +--- !AutoTuning +Pass: loop-unroll +Name: for.cond +DebugLoc: { File: loop-opp.c, Line: 4, Column: 5 } +Function: foo +CodeRegionType: loop +Args: + - UnrollCount: [number] +Invocation: 0 +... diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4920329dbd4b4fe1179e7ab9d763d3d41494ef6f --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml @@ -0,0 +1,10 @@ +# CodeRegionHash is correct for only first code region only. +!AutoTuning {Args: [{UnrollCount: 2}], CodeRegionHash: 8456922293277663707, CodeRegionType: loop, + DebugLoc: {Column: 8, File: loop-nest.c, Line: 10}, Function: loop_nest, Invocation: 0, + Name: for.body6.us, Pass: loop-unroll} +--- !AutoTuning {Args: [{UnrollCount: 4}], CodeRegionHash: 8456922293277663707, CodeRegionType: loop, + DebugLoc: {Column: 5, File: loop-nest.c, Line: 9}, Function: loop_nest, Invocation: 0, + Name: for.cond4.preheader.us, Pass: loop-unroll} +--- !AutoTuning {Args: [{UnrollCount: 4}], CodeRegionHash: 8456922293277663707, CodeRegionType: loop, + DebugLoc: {Column: 3, File: loop-nest.c, Line: 8}, Function: loop_nest, Invocation: 0, + Name: for.cond1.preheader, Pass: loop-unroll} diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a90cebbce88f87734eb81e1248c661cf9263f66d --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml @@ -0,0 +1,9 @@ +--- !AutoTuning +Pass: loop-unroll +Name: loop +Function: invariant_backedge_1 +CodeRegionType: loop +Args: + - UnrollCount: [number] +Invocation: 0 +... diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18681a0e2efee692ab25a001230e075ada7c1e35 --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml @@ -0,0 +1,10 @@ +--- !AutoTuning +Pass: loop-unroll +Name: label %5 +Function: main +CodeRegionType: loop +CodeRegionHash: [hash] +Args: +- UnrollCount: [number] +Invocation: 1 +... diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml new file mode 100644 index 0000000000000000000000000000000000000000..166f877a232eb1475dc4f678f20f45c764689583 --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml @@ -0,0 +1,10 @@ +--- !AutoTuning +Pass: loop-unroll +Name: [name] +Function: foo +CodeRegionType: loop +CodeRegionHash: [hash] +Args: + - UnrollCount: [number] +Invocation: 1 +... diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b626473cf7829294205a6ff03458fa33424e6b91 --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml @@ -0,0 +1,8 @@ +--- !AutoTuning +Pass: loop-unroll +CodeRegionType: loop +CodeRegionHash: [hash] +Args: + - UnrollCount: [number] +Invocation: 1 +... diff --git a/llvm/test/AutoTuning/LoopUnroll/debug_loc.ll b/llvm/test/AutoTuning/LoopUnroll/debug_loc.ll new file mode 100644 index 0000000000000000000000000000000000000000..85dd690d01c5c5d1d509ace40501278130420308 --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/debug_loc.ll @@ -0,0 +1,161 @@ +; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' | \ +; RUN: FileCheck %s -check-prefix=DISABLE + +; RUN: rm %t.unroll_debug_loc0.yaml -rf +; RUN: sed 's#\[number\]#0#g' %S/Inputs/debug_loc_template.yaml > %t.unroll_debug_loc0.yaml +; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%t.unroll_debug_loc0.yaml | \ +; RUN: FileCheck %s -check-prefix=UNROLL0 + +; RUN: rm %t.unroll_debug_loc4.yaml -rf +; RUN: sed 's#\[number\]#4#g' %S/Inputs/debug_loc_template.yaml > %t.unroll_debug_loc4.yaml +; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-code-region-matching-hash=false \ +; RUN: -auto-tuning-input=%t.unroll_debug_loc4.yaml | \ +; RUN: FileCheck %s -check-prefix=UNROLL4 + +; RUN: rm %t.unroll4.yaml -rf +; RUN: sed 's#\[number\]#4#g; s#\[name\]#for.cond#g; s#\[hash\]#11552168367013316892#g;'\ +; RUN: %S/Inputs/unroll_template.yaml > %t.unroll4.yaml +; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-code-region-matching-hash=false \ +; RUN: -auto-tuning-input=%t.unroll4.yaml | \ +; RUN: FileCheck %s -check-prefix=UNROLL4-MISMATCH + +; UNSUPPORTED: windows + +; ModuleID = 'loop-opp.c' +source_filename = "loop-opp.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind uwtable +define i32 @foo(i32* %n) #0 !dbg !6 { +entry: + %n.addr = alloca i32*, align 8 + %b = alloca i32, align 4 + %i = alloca i32, align 4 + store i32* %n, i32** %n.addr, align 8 + call void @llvm.dbg.declare(metadata i32** %n.addr, metadata !11, metadata !12), !dbg !13 + call void @llvm.dbg.declare(metadata i32* %b, metadata !14, metadata !12), !dbg !15 + store i32 0, i32* %b, align 4, !dbg !15 + call void @llvm.dbg.declare(metadata i32* %i, metadata !16, metadata !12), !dbg !18 + store i32 0, i32* %i, align 4, !dbg !18 + br label %for.cond, !dbg !19 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4, !dbg !20 + %1 = load i32*, i32** %n.addr, align 8, !dbg !23 + %2 = load i32, i32* %1, align 4, !dbg !24 + %cmp = icmp slt i32 %0, %2, !dbg !25 + br i1 %cmp, label %for.body, label %for.end, !dbg !26 + +for.body: ; preds = %for.cond + %3 = load i32, i32* %b, align 4, !dbg !28 + %add = add nsw i32 %3, 1, !dbg !30 + store i32 %add, i32* %b, align 4, !dbg !31 + br label %for.inc, !dbg !32 + +for.inc: ; preds = %for.body + %4 = load i32, i32* %i, align 4, !dbg !33 + %inc = add nsw i32 %4, 1, !dbg !33 + store i32 %inc, i32* %i, align 4, !dbg !33 + br label %for.cond, !dbg !35, !llvm.loop !36 + +for.end: ; preds = %for.cond + %5 = load i32, i32* %b, align 4, !dbg !39 + ret i32 %5, !dbg !40 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "" ,isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "loop-opp.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{!""} +!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!7 = !DISubroutineType(types: !8) +!8 = !{!9, !10} +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64) +!11 = !DILocalVariable(name: "n", arg: 1, scope: !6, file: !1, line: 1, type: !10) +!12 = !DIExpression() +!13 = !DILocation(line: 1, column: 20, scope: !6) +!14 = !DILocalVariable(name: "b", scope: !6, file: !1, line: 3, type: !9) +!15 = !DILocation(line: 3, column: 9, scope: !6) +!16 = !DILocalVariable(name: "i", scope: !17, file: !1, line: 4, type: !9) +!17 = distinct !DILexicalBlock(scope: !6, file: !1, line: 4, column: 5) +!18 = !DILocation(line: 4, column: 14, scope: !17) +!19 = !DILocation(line: 4, column: 10, scope: !17) +!20 = !DILocation(line: 4, column: 20, scope: !21) +!21 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 1) +!22 = distinct !DILexicalBlock(scope: !17, file: !1, line: 4, column: 5) +!23 = !DILocation(line: 4, column: 25, scope: !21) +!24 = !DILocation(line: 4, column: 24, scope: !21) +!25 = !DILocation(line: 4, column: 22, scope: !21) +!26 = !DILocation(line: 4, column: 5, scope: !27) +!27 = !DILexicalBlockFile(scope: !17, file: !1, discriminator: 1) +!28 = !DILocation(line: 6, column: 11, scope: !29) +!29 = distinct !DILexicalBlock(scope: !22, file: !1, line: 5, column: 5) +!30 = !DILocation(line: 6, column: 12, scope: !29) +!31 = !DILocation(line: 6, column: 9, scope: !29) +!32 = !DILocation(line: 7, column: 5, scope: !29) +!33 = !DILocation(line: 4, column: 28, scope: !34) +!34 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 2) +!35 = !DILocation(line: 4, column: 5, scope: !34) +!36 = distinct !{!36, !37, !38} +!37 = !DILocation(line: 4, column: 5, scope: !17) +!38 = !DILocation(line: 7, column: 5, scope: !17) +!39 = !DILocation(line: 8, column: 12, scope: !6) +!40 = !DILocation(line: 8, column: 5, scope: !6) + +; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled when the auto-tuning feature is disabled when +; the input remark contains DebugLoc info. +; +; DISABLE-LABEL: @foo( +; DISABLE: for.cond +; DISABLE: for.body +; DISABLE-NOT: for.body.1 +; DISABLE: for.inc +; DISABLE-NOT: llvm.loop.unroll.disable + +; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled +; when unroll count explicitly set to be 0. +; +; UNROLL0-LABEL: @foo( +; UNROLL0: for.cond +; UNROLL0: for.body +; UNROLL0-NOT: for.body.1 +; UNROLL0: for.inc +; UNROLL0-NOT: llvm.loop.unroll.disable + +; Auto-tuning-enabled loop unrolling - check that we can unroll the loop by 4 +; when explicitly requested. +; +; UNROLL4-LABEL: @foo( +; UNROLL4: for.cond +; UNROLL4: for.body +; UNROLL4: for.body.1 +; UNROLL4: for.body.2 +; UNROLL4: for.body.3 +; UNROLL4: llvm.loop.unroll.disable + +; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled +; when DebugLoc is missing in the input remark. +; +; UNROLL4-MISMATCH-LABEL: @foo( +; UNROLL4-MISMATCH: for.cond +; UNROLL4-MISMATCH: for.body +; UNROLL4-MISMATCH-NOT: for.body.1 +; UNROLL4-MISMATCH: for.inc +; UNROLL4-MISMATCH-NOT: llvm.loop.unroll.disable diff --git a/llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll b/llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll new file mode 100644 index 0000000000000000000000000000000000000000..414c6ff2d1b0e5d637468c23edd0f8bb6c25ad5b --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll @@ -0,0 +1,56 @@ +; RUN: rm %t.default_opp -rf +; RUN: opt %s -S -auto-tuning-opp=%t.default_opp -auto-tuning-type-filter=Loop \ +; RUN: -passes='require,loop(loop-unroll-full)' --disable-output +; RUN: FileCheck %s --input-file %t.default_opp/dynamic_config.ll.yaml + +; Function Attrs: nofree norecurse nounwind uwtable +define dso_local void @transform(i64* nocapture %W) local_unnamed_addr{ +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.037 = phi i32 [ 16, %entry ], [ %inc, %for.body ] + %sub = add nsw i32 %i.037, -3 + %idxprom = sext i32 %sub to i64 + %arrayidx = getelementptr inbounds i64, i64* %W, i64 %idxprom + %0 = load i64, i64* %arrayidx, align 8 + %sub1 = add nsw i32 %i.037, -6 + %idxprom2 = sext i32 %sub1 to i64 + %arrayidx3 = getelementptr inbounds i64, i64* %W, i64 %idxprom2 + %1 = load i64, i64* %arrayidx3, align 8 + %xor = xor i64 %1, %0 + %idxprom4 = zext i32 %i.037 to i64 + %arrayidx5 = getelementptr inbounds i64, i64* %W, i64 %idxprom4 + store i64 %xor, i64* %arrayidx5, align 8 + %inc = add nuw nsw i32 %i.037, 1 + %cmp = icmp ult i32 %i.037, 79 + br i1 %cmp, label %for.body, label %for.body8.preheader + +for.body8.preheader: ; preds = %for.body + br label %for.body8 + +for.body8: ; preds = %for.body8.preheader, %for.body8 + %indvars.iv = phi i64 [ 80, %for.body8.preheader ], [ %indvars.iv.next, %for.body8 ] + %2 = add nsw i64 %indvars.iv, -4 + %arrayidx11 = getelementptr inbounds i64, i64* %W, i64 %2 + %3 = load i64, i64* %arrayidx11, align 8 + %4 = add nsw i64 %indvars.iv, -5 + %arrayidx14 = getelementptr inbounds i64, i64* %W, i64 %4 + %5 = load i64, i64* %arrayidx14, align 8 + %xor15 = xor i64 %5, %3 + %arrayidx17 = getelementptr inbounds i64, i64* %W, i64 %indvars.iv + store i64 %xor15, i64* %arrayidx17, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 256 + br i1 %exitcond, label %for.body8, label %for.end20 + +for.end20: ; preds = %for.body8 + ret void +} + +; CHECK: --- !AutoTuning +; CHECK: DynamicConfigs: { UnrollCount: [ 0, 1, 64, 16, 32 ] +; CHECK: ... +; CHECK-NEXT: --- !AutoTuning +; CHECK: DynamicConfigs: { UnrollCount: [ 0, 1, 64, 16, 32 ] +; CHECK: ... diff --git a/llvm/test/AutoTuning/LoopUnroll/loop_nest.ll b/llvm/test/AutoTuning/LoopUnroll/loop_nest.ll new file mode 100644 index 0000000000000000000000000000000000000000..7f3e27ca057aace9a03d29260339777d7403d600 --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/loop_nest.ll @@ -0,0 +1,136 @@ +; REQUIRES: asserts +; CodeRegionHash matches for the first code region only. AutoTuner will find +; match for one code region when hash matching is enabled. AutoTuner will find +; match for all three code regions when hash matching is disabl3ed. + +; RUN: rm -rf %t.loop_nest.txt +; RUN: opt %s -passes='require,loop(loop-unroll-full)' \ +; RUN: -debug-only=autotuning -auto-tuning-input=%S/Inputs/loop_nest.yaml \ +; RUN: --disable-output &> %t.loop_nest.txt +; RUN: grep 'UnrollCount is set' %t.loop_nest.txt | wc -l | \ +; RUN: FileCheck %s -check-prefix=HASH_MATCHING_ENABLED + +; RUN: rm -rf %t.loop_nest.txt +; RUN: opt %s -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-input=%S/Inputs/loop_nest.yaml -debug-only=autotuning \ +; RUN: -auto-tuning-code-region-matching-hash=false --disable-output &> %t.loop_nest.txt +; RUN: grep 'UnrollCount is set' %t.loop_nest.txt | wc -l | \ +; RUN: FileCheck %s -check-prefix=HASH_MATCHING_DISABLED + +; ModuleID = 'loop-nest.c' +source_filename = "loop-nest.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; Function Attrs: nofree norecurse nounwind uwtable +define dso_local void @loop_nest(i32 %ni, i32 %nj, i32 %nk, i32 %alpha, i32 %beta, i32** nocapture readonly %A, i32** nocapture readonly %B, i32** nocapture readonly %C) local_unnamed_addr #0 !dbg !10 { +entry: + %cmp41 = icmp sgt i32 %ni, 0, !dbg !12 + br i1 %cmp41, label %for.cond1.preheader.lr.ph, label %for.end23, !dbg !13 + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp238 = icmp slt i32 %nk, 1 + %cmp536 = icmp slt i32 %nj, 1 + %wide.trip.count51 = zext i32 %ni to i64, !dbg !12 + %wide.trip.count47 = zext i32 %nk to i64 + %wide.trip.count = zext i32 %nj to i64 + %brmerge = or i1 %cmp238, %cmp536 + br label %for.cond1.preheader, !dbg !13 + +for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.inc21 + %indvars.iv49 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next50, %for.inc21 ] + br i1 %brmerge, label %for.inc21, label %for.cond4.preheader.us.preheader, !dbg !14 + +for.cond4.preheader.us.preheader: ; preds = %for.cond1.preheader + %arrayidx15 = getelementptr inbounds i32*, i32** %C, i64 %indvars.iv49 + %arrayidx = getelementptr inbounds i32*, i32** %A, i64 %indvars.iv49 + %.pre = load i32*, i32** %arrayidx, align 8, !tbaa !15 + %.pre53 = load i32*, i32** %arrayidx15, align 8, !tbaa !15 + br label %for.cond4.preheader.us, !dbg !14 + +for.cond4.preheader.us: ; preds = %for.cond4.preheader.us.preheader, %for.cond4.for.inc18_crit_edge.us + %indvars.iv45 = phi i64 [ 0, %for.cond4.preheader.us.preheader ], [ %indvars.iv.next46, %for.cond4.for.inc18_crit_edge.us ] + %arrayidx8.us = getelementptr inbounds i32, i32* %.pre, i64 %indvars.iv45 + %arrayidx10.us = getelementptr inbounds i32*, i32** %B, i64 %indvars.iv45 + %0 = load i32*, i32** %arrayidx10.us, align 8, !tbaa !15 + br label %for.body6.us, !dbg !19 + +for.body6.us: ; preds = %for.cond4.preheader.us, %for.body6.us + %indvars.iv = phi i64 [ 0, %for.cond4.preheader.us ], [ %indvars.iv.next, %for.body6.us ] + %1 = load i32, i32* %arrayidx8.us, align 4, !dbg !20, !tbaa !21 + %mul.us = mul nsw i32 %1, %alpha, !dbg !23 + %arrayidx12.us = getelementptr inbounds i32, i32* %0, i64 %indvars.iv, !dbg !24 + %2 = load i32, i32* %arrayidx12.us, align 4, !dbg !24, !tbaa !21 + %mul13.us = mul nsw i32 %mul.us, %2, !dbg !25 + %arrayidx17.us = getelementptr inbounds i32, i32* %.pre53, i64 %indvars.iv, !dbg !26 + %3 = load i32, i32* %arrayidx17.us, align 4, !dbg !27, !tbaa !21 + %add.us = add nsw i32 %3, %mul13.us, !dbg !27 + store i32 %add.us, i32* %arrayidx17.us, align 4, !dbg !27, !tbaa !21 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !28 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !29 + br i1 %exitcond.not, label %for.cond4.for.inc18_crit_edge.us, label %for.body6.us, !dbg !19, !llvm.loop !30 + +for.cond4.for.inc18_crit_edge.us: ; preds = %for.body6.us + %indvars.iv.next46 = add nuw nsw i64 %indvars.iv45, 1, !dbg !33 + %exitcond48.not = icmp eq i64 %indvars.iv.next46, %wide.trip.count47, !dbg !34 + br i1 %exitcond48.not, label %for.inc21, label %for.cond4.preheader.us, !dbg !14, !llvm.loop !35 + +for.inc21: ; preds = %for.cond4.for.inc18_crit_edge.us, %for.cond1.preheader + %indvars.iv.next50 = add nuw nsw i64 %indvars.iv49, 1, !dbg !37 + %exitcond52.not = icmp eq i64 %indvars.iv.next50, %wide.trip.count51, !dbg !12 + br i1 %exitcond52.not, label %for.end23, label %for.cond1.preheader, !dbg !13, !llvm.loop !38 + +for.end23: ; preds = %for.inc21, %entry + ret void, !dbg !40 +} + +attributes #0 = { nofree norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei BiSheng Compiler clang version 12.0.0 (clang-a279e099a09a flang-9a86b70390a7)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "loop-nest.c", directory: "/home/m00629332/code/autoTuner") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 1, !"branch-target-enforcement", i32 0} +!6 = !{i32 1, !"sign-return-address", i32 0} +!7 = !{i32 1, !"sign-return-address-all", i32 0} +!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} +!9 = !{!"Huawei BiSheng Compiler clang version 12.0.0 (clang-a279e099a09a flang-9a86b70390a7)"} +!10 = distinct !DISubprogram(name: "loop_nest", scope: !1, file: !1, line: 1, type: !11, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!11 = !DISubroutineType(types: !2) +!12 = !DILocation(line: 8, column: 17, scope: !10) +!13 = !DILocation(line: 8, column: 3, scope: !10) +!14 = !DILocation(line: 9, column: 5, scope: !10) +!15 = !{!16, !16, i64 0} +!16 = !{!"any pointer", !17, i64 0} +!17 = !{!"omnipotent char", !18, i64 0} +!18 = !{!"Simple C/C++ TBAA"} +!19 = !DILocation(line: 10, column: 8, scope: !10) +!20 = !DILocation(line: 11, column: 23, scope: !10) +!21 = !{!22, !22, i64 0} +!22 = !{!"int", !17, i64 0} +!23 = !DILocation(line: 11, column: 21, scope: !10) +!24 = !DILocation(line: 11, column: 33, scope: !10) +!25 = !DILocation(line: 11, column: 31, scope: !10) +!26 = !DILocation(line: 11, column: 4, scope: !10) +!27 = !DILocation(line: 11, column: 12, scope: !10) +!28 = !DILocation(line: 10, column: 29, scope: !10) +!29 = !DILocation(line: 10, column: 22, scope: !10) +!30 = distinct !{!30, !19, !31, !32} +!31 = !DILocation(line: 11, column: 39, scope: !10) +!32 = !{!"llvm.loop.mustprogress"} +!33 = !DILocation(line: 9, column: 26, scope: !10) +!34 = !DILocation(line: 9, column: 19, scope: !10) +!35 = distinct !{!35, !14, !36, !32} +!36 = !DILocation(line: 12, column: 5, scope: !10) +!37 = !DILocation(line: 8, column: 24, scope: !10) +!38 = distinct !{!38, !13, !39, !32} +!39 = !DILocation(line: 13, column: 3, scope: !10) +!40 = !DILocation(line: 15, column: 1, scope: !10) + +; HASH_MATCHING_ENABLED: 1 +; HASH_MATCHING_DISABLED: 3 diff --git a/llvm/test/AutoTuning/LoopUnroll/loop_peel.ll b/llvm/test/AutoTuning/LoopUnroll/loop_peel.ll new file mode 100644 index 0000000000000000000000000000000000000000..f3839a49b20ea0b80ee28f03fe72166e6e225bd8 --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/loop_peel.ll @@ -0,0 +1,53 @@ +; NOTE: This file is used to test when UnrollCount = 1 and when the compiler +; sees that Loop Peeling is beneficial and possible, then we do Loop Peeling. +; RUN: rm %t.unroll1.yaml -rf +; RUN: sed 's#\[number\]#1#g;' %S/Inputs/loop_peel.yaml > %t.unroll1.yaml +; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-code-region-matching-hash=false \ +; RUN: -auto-tuning-input=%t.unroll1.yaml | FileCheck %s + +; RUN: rm %t.unroll0.yaml -rf +; RUN: sed 's#\[number\]#0#g;' %S/Inputs/loop_peel.yaml > %t.unroll0.yaml +; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-code-region-matching-hash=false \ +; RUN: -auto-tuning-input=%t.unroll0.yaml | FileCheck %s --check-prefix=DISABLE + +; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' \ +; RUN: -auto-tuning-code-region-matching-hash=false \ +; RUN: -auto-tuning-opp=%t.unroll_opp -auto-tuning-type-filter=Loop --disable-output +; RUN: FileCheck %s --input-file %t.unroll_opp/loop_peel.ll.yaml -check-prefix=TEST-1 + +define i32 @invariant_backedge_1(i32 %a, i32 %b) { +; CHECK-LABEL: @invariant_backedge_1 +; CHECK-NOT: %plus = phi +; CHECK: loop.peel: +; CHECK: loop: +; CHECK: %i = phi +; CHECK: %sum = phi +; DISABLE-LABEL: @invariant_backedge_1 +; DISABLE-NOT: loop.peel: +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %inc, %loop ] + %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ] + %plus = phi i32 [ %a, %entry ], [ %b, %loop ] + + %incsum = add i32 %sum, %plus + %inc = add i32 %i, 1 + %cmp = icmp slt i32 %i, 1000 + + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %sum +} + +; Check for dynamic values when UnrollCount is set to 1: +; TEST-1: Pass: loop-unroll +; TEST-1-NEXT: Name: loop +; TEST-1-NEXT: Function: invariant_backedge_1 +; TEST-1-NEXT: CodeRegionType: loop +; TEST-1-NEXT: CodeRegionHash: {{[0-9]+}} +; TEST-1-NEXT: DynamicConfigs: { UnrollCount: [ 0, 1, 2 ] } diff --git a/llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll b/llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll new file mode 100644 index 0000000000000000000000000000000000000000..843b8e28f3d841879f7c50490763e503901ae2ba --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll @@ -0,0 +1,129 @@ +; RUN: rm %t.unroll_opp -rf +; RUN: opt %s -S -auto-tuning-opp=%t.unroll_opp -auto-tuning-type-filter=Loop \ +; RUN: -passes='require,loop(loop-unroll-full)' --disable-output +; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-1 +; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-2 + +; RUN: rm %t.unroll_opp -rf +; RUN: opt %s -S -auto-tuning-opp=%t.unroll_opp -auto-tuning-type-filter=Loop \ +; RUN: -passes='require,function(loop-unroll)' --disable-output +; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-1 +; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-2 + +; This function contains two loops. loop for.body is defined with a pragma +; unroll_count(4) and loop for.body9 is without a pragama. AutoTuner will only +; consider for.body9 as a tuning opportunity. + +; ModuleID = 'loop-unroll.c' +source_filename = "loop-unroll.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; Function Attrs: nofree norecurse nounwind uwtable +define dso_local void @loop(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* noalias nocapture %c, i32* noalias nocapture %d, i32 %len) local_unnamed_addr #0 !dbg !10 { +entry: + %cmp34 = icmp slt i32 0, %len, !dbg !12 + br i1 %cmp34, label %for.body.lr.ph, label %for.cond6.preheader, !dbg !13 + +for.body.lr.ph: ; preds = %entry + br label %for.body, !dbg !13 + +for.cond.for.cond6.preheader_crit_edge: ; preds = %for.body + br label %for.cond6.preheader, !dbg !13 + +for.cond6.preheader: ; preds = %for.cond.for.cond6.preheader_crit_edge, %entry + %cmp732 = icmp slt i32 0, %len, !dbg !14 + br i1 %cmp732, label %for.body9.lr.ph, label %for.cond.cleanup8, !dbg !15 + +for.body9.lr.ph: ; preds = %for.cond6.preheader + br label %for.body9, !dbg !15 + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.035 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %idxprom = zext i32 %i.035 to i64, !dbg !16 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom, !dbg !16 + %0 = load i32, i32* %arrayidx, align 4, !dbg !16, !tbaa !17 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %idxprom, !dbg !21 + %1 = load i32, i32* %arrayidx2, align 4, !dbg !21, !tbaa !17 + %add = add nsw i32 %1, %0, !dbg !22 + %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 %idxprom, !dbg !23 + store i32 %add, i32* %arrayidx4, align 4, !dbg !24, !tbaa !17 + %inc = add nuw nsw i32 %i.035, 1, !dbg !25 + %cmp = icmp slt i32 %inc, %len, !dbg !12 + br i1 %cmp, label %for.body, label %for.cond.for.cond6.preheader_crit_edge, !dbg !13, !llvm.loop !26 + +for.cond6.for.cond.cleanup8_crit_edge: ; preds = %for.body9 + br label %for.cond.cleanup8, !dbg !15 + +for.cond.cleanup8: ; preds = %for.cond6.for.cond.cleanup8_crit_edge, %for.cond6.preheader + ret void, !dbg !30 + +for.body9: ; preds = %for.body9.lr.ph, %for.body9 + %i5.033 = phi i32 [ 0, %for.body9.lr.ph ], [ %inc17, %for.body9 ] + %idxprom10 = zext i32 %i5.033 to i64, !dbg !31 + %arrayidx11 = getelementptr inbounds i32, i32* %a, i64 %idxprom10, !dbg !31 + %2 = load i32, i32* %arrayidx11, align 4, !dbg !31, !tbaa !17 + %arrayidx13 = getelementptr inbounds i32, i32* %b, i64 %idxprom10, !dbg !32 + %3 = load i32, i32* %arrayidx13, align 4, !dbg !32, !tbaa !17 + %mul = mul nsw i32 %3, %2, !dbg !33 + %arrayidx15 = getelementptr inbounds i32, i32* %d, i64 %idxprom10, !dbg !34 + store i32 %mul, i32* %arrayidx15, align 4, !dbg !35, !tbaa !17 + %inc17 = add nuw nsw i32 %i5.033, 1, !dbg !36 + %cmp7 = icmp slt i32 %inc17, %len, !dbg !14 + br i1 %cmp7, label %for.body9, label %for.cond6.for.cond.cleanup8_crit_edge, !dbg !15, !llvm.loop !37 +} + +attributes #0 = { nofree norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei Bisheng Compiler clang version 12.0.0 (0261bbf0b2fd)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "loop-unroll.c", directory: "/home/AutoTuner/") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 1, !"branch-target-enforcement", i32 0} +!6 = !{i32 1, !"sign-return-address", i32 0} +!7 = !{i32 1, !"sign-return-address-all", i32 0} +!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} +!9 = !{!"Huawei Bisheng Compiler clang version 12.0.0 (0261bbf0b2fd)"} +!10 = distinct !DISubprogram(name: "a", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!11 = !DISubroutineType(types: !2) +!12 = !DILocation(line: 3, column: 20, scope: !10) +!13 = !DILocation(line: 3, column: 5, scope: !10) +!14 = !DILocation(line: 7, column: 20, scope: !10) +!15 = !DILocation(line: 7, column: 5, scope: !10) +!16 = !DILocation(line: 4, column: 16, scope: !10) +!17 = !{!18, !18, i64 0} +!18 = !{!"int", !19, i64 0} +!19 = !{!"omnipotent char", !20, i64 0} +!20 = !{!"Simple C/C++ TBAA"} +!21 = !DILocation(line: 4, column: 23, scope: !10) +!22 = !DILocation(line: 4, column: 21, scope: !10) +!23 = !DILocation(line: 4, column: 9, scope: !10) +!24 = !DILocation(line: 4, column: 14, scope: !10) +!25 = !DILocation(line: 3, column: 28, scope: !10) +!26 = distinct !{!26, !13, !27, !28, !29} +!27 = !DILocation(line: 5, column: 5, scope: !10) +!28 = !{!"llvm.loop.mustprogress"} +!29 = !{!"llvm.loop.unroll.count", i32 4} +!30 = !DILocation(line: 10, column: 1, scope: !10) +!31 = !DILocation(line: 8, column: 16, scope: !10) +!32 = !DILocation(line: 8, column: 23, scope: !10) +!33 = !DILocation(line: 8, column: 21, scope: !10) +!34 = !DILocation(line: 8, column: 9, scope: !10) +!35 = !DILocation(line: 8, column: 14, scope: !10) +!36 = !DILocation(line: 7, column: 28, scope: !10) +!37 = distinct !{!37, !15, !38, !28} +!38 = !DILocation(line: 9, column: 5, scope: !10) + + +; TEST-1: Pass: loop-unroll +; TEST-1-NOT: Pass: loop-unroll + +; TEST-2: Name: for.body9 +; TEST-2-NEXT: DebugLoc: { File: loop-unroll.c, Line: 7, Column: 5 } +; TEST-2-NEXT: Function: loop +; TEST-2-NEXT: CodeRegionType: loop diff --git a/llvm/test/AutoTuning/LoopUnroll/unroll.ll b/llvm/test/AutoTuning/LoopUnroll/unroll.ll new file mode 100644 index 0000000000000000000000000000000000000000..ba5c89fffaff922007e3126b8f22eac4aa096937 --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/unroll.ll @@ -0,0 +1,101 @@ +; RUN: opt %s -S -passes=loop-unroll | FileCheck %s -check-prefix=DISABLE + +; RUN: rm %t.unroll0.yaml -rf +; RUN: sed 's#\[number\]#0#g; s#\[name\]#for.body#g; s#\[hash\]#14791762861362113823#g' \ +; RUN: %S/Inputs/unroll_template.yaml > %t.unroll0.yaml +; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll0.yaml \ +; RUN: -auto-tuning-code-region-matching-hash=false | \ +; RUN: FileCheck %s -check-prefix=UNROLL0 + +; RUN: rm %t.unroll0.yaml -rf +; RUN: sed 's#\[number\]#0#g; s#\[hash\]#14791762861362113823#g' \ +; RUN: %S/Inputs/unroll_template_no_metadata.yaml > %t.unroll0.yaml +; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll0.yaml \ +; RUN: -auto-tuning-omit-metadata | \ +; RUN: FileCheck %s -check-prefix=UNROLL0 + +; RUN: rm %t.result1 %t.unroll1.yaml -rf +; RUN: sed 's#\[number\]#1#g; s#\[name\]#for.body#g; s#\[hash\]#14791762861362113823#g' \ +; RUN: %S/Inputs/unroll_template.yaml > %t.unroll1.yaml +; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll1.yaml | \ +; RUN: FileCheck %s -check-prefix=UNROLL1 + +; RUN: rm %t.result1 %t.unroll1.yaml -rf +; RUN: sed 's#\[number\]#1#g; s#\[hash\]#14791762861362113823#g' \ +; RUN: %S/Inputs/unroll_template_no_metadata.yaml > %t.unroll1.yaml +; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll1.yaml \ +; RUN: -auto-tuning-omit-metadata | \ +; RUN: FileCheck %s -check-prefix=UNROLL1 + +; RUN: rm %t.result4 %t.unroll4.yaml -rf +; RUN: sed 's#\[number\]#4#g; s#\[name\]#for.body#g; s#\[hash\]#14791762861362113823#g' \ +; RUN: %S/Inputs/unroll_template.yaml > %t.unroll4.yaml +; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll4.yaml | \ +; RUN: FileCheck %s -check-prefix=UNROLL4 + +; RUN: rm %t.result4 %t.unroll4.yaml -rf +; RUN: sed 's#\[number\]#4#g; s#\[hash\]#14791762861362113823#g' \ +; RUN: %S/Inputs/unroll_template_no_metadata.yaml > %t.unroll4.yaml +; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll4.yaml \ +; RUN: -auto-tuning-omit-metadata | \ +; RUN: FileCheck %s -check-prefix=UNROLL4 + +; UNSUPPORTED: windows + +define void @foo(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled when the auto-tuning feature is disabled +; +; DISABLE-LABEL: @foo( +; DISABLE: store i32 +; DISABLE-NOT: store i32 +; DISABLE: br i1 +; DISABLE-NOT: llvm.loop.unroll.disable + + +; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled +; when unroll count explicitly set to be 0. +; +; UNROLL0-LABEL: @foo( +; UNROLL0: store i32 +; UNROLL0-NOT: store i32 +; UNROLL0: br i1 +; UNROLL0-NOT: llvm.loop.unroll.disable + + +; Auto-tuning-enabled loop unrolling - Requesting UnrollCount = 1 will perform +; Loop Peeling, and if Loop Peeling isn't possible/beneficial then Unroll Count +; is unchanged. +; +; UNROLL1-LABEL: @foo( +; UNROLL1: store i32 +; UNROLL1-NOT: store i32 +; UNROLL1: br i1 +; UNROLL1: llvm.loop.unroll.disable + +; Auto-tuning-enabled loop unrolling - check that we can unroll the loop by 4 +; when explicitly requested. +; +; UNROLL4-LABEL: @foo( +; UNROLL4: store i32 +; UNROLL4: store i32 +; UNROLL4: store i32 +; UNROLL4: store i32 +; UNROLL4: br i1 +; UNROLL4: llvm.loop.unroll.disable diff --git a/llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll b/llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll new file mode 100644 index 0000000000000000000000000000000000000000..480ccad640ae13f3daa83ccf706bac237bf38f0e --- /dev/null +++ b/llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll @@ -0,0 +1,113 @@ +; Test loop unrolling using auto-tuning YAML api with IRs generated when ASSERTION=OFF +; The IRs generated when ASSERTION=OFF usually only use slot numbers as variable names. + +; RUN: opt %s -S -passes='require,loop(loop-unroll-full)' | \ +; RUN: FileCheck %s -check-prefix=DISABLE + +; RUN: rm %t.result1_raw %t.unroll1_raw.yaml -rf +; RUN: sed 's#\[number\]#1#g; s#\[hash\]#18159364858606519094#g' \ +; RUN: %S/Inputs/unroll_raw_template.yaml > %t.unroll1_raw.yaml +; RUN: opt %s -S -passes='require,function(loop-unroll)' \ +; RUN: -auto-tuning-input=%t.unroll1_raw.yaml | FileCheck %s -check-prefix=UNROLL1 + +; RUN: rm %t.result2_raw %t.unroll2_raw.yaml -rf +; RUN: sed 's#\[number\]#2#g; s#\[hash\]#18159364858606519094#g' \ +; RUN: %S/Inputs/unroll_raw_template.yaml > %t.unroll2_raw.yaml +; RUN: opt %s -S -passes='require,function(loop-unroll)' \ +; RUN: -auto-tuning-input=%t.unroll2_raw.yaml | FileCheck %s -check-prefix=UNROLL2 + +; RUN: rm %t.result4_raw %t.unroll4_raw.yaml -rf +; RUN: sed 's#\[number\]#4#g; s#\[hash\]#18159364858606519094#g' \ +; RUN: %S/Inputs/unroll_raw_template.yaml > %t.unroll4_raw.yaml +; RUN: opt %s -S -passes='require,function(loop-unroll)' \ +; RUN: -auto-tuning-input=%t.unroll4_raw.yaml | FileCheck %s -check-prefix=UNROLL4 + +; UNSUPPORTED: windows + +; ModuleID = 't.ll' +source_filename = "t.ll" + +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +define void @test(i32*) { + %2 = alloca i32*, align 8 + store i32* %0, i32** %2, align 8 + %3 = load i32*, i32** %2, align 8 + %4 = load i32, i32* %3, align 4 + %5 = add nsw i32 %4, 2 + %6 = load i32*, i32** %2, align 8 + store i32 %5, i32* %6, align 4 + ret void +} + +define i32 @main() { + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + store i32 8, i32* %2, align 4 + %3 = load i32, i32* %2, align 4 + %4 = icmp sle i32 %3, 88 + br i1 %4, label %.lr.ph, label %13 + +.lr.ph: ; preds = %0 + br label %5 + +;