diff --git a/acpo/model-ai4cfh.acpo b/acpo/model-ai4cfh.acpo new file mode 100644 index 0000000000000000000000000000000000000000..7639ecd1b39323dc9f0400445c11eb4e3c8c1e01 --- /dev/null +++ b/acpo/model-ai4cfh.acpo @@ -0,0 +1,16 @@ +ModelName=AI4CFH +Features={BlockWithMultipleSuccecorsPerLoop, float32}, {PtrArgs, float32}, {MaxDomTreeLevel, float32}, {IsLinkOnceODR, float32}, {IsLocal, float32}, {Calls, float32}, {Blocks, float32}, {InitialSize, float32}, {MaxLoopDepth, float32}, {users, float32}, {InstructionPerBlock, float32}, {Loops, float32}, {conditionally_executed_blocks, float32}, {IsLinkOnce, float32}, {basic_block_count, float32}, {PtrCallee, float32}, {CallReturnPtr, float32}, {ConditionalBranch, float32}, {CBwithArg, float32}, {CallerHeight, float32}, {CallUsage, float32}, {IsRecursive, float32}, {NumCallsiteInLoop, float32}, {NumOfCallUsesInLoop, float32}, {EntryBlockFreq, float32}, {MaxCallsiteBlockFreq, float32}, {SuccessorPerBlock, float32}, {AvgVecInstr, float32}, {AvgNestedLoopLevel, float32}, {InstrPerLoop, float32} +Outputs={FH, int64} +Signature=serving_default +ModelDirectory=./models/ai4c-fh +OutputKey=output_0 +ModelInference= +# Above ModelInference need to be updated on python side +ModelName=AI4CFH +Features={BlockWithMultipleSuccecorsPerLoop, float32}, {PtrArgs, float32}, {MaxDomTreeLevel, float32}, {IsLinkOnceODR, float32}, {IsLocal, float32}, {Calls, float32}, {Blocks, float32}, {InitialSize, float32}, {MaxLoopDepth, float32}, {users, float32}, {InstructionPerBlock, float32}, {Loops, float32}, {conditionally_executed_blocks, float32}, {IsLinkOnce, float32}, {basic_block_count, float32}, {PtrCallee, float32}, {CallReturnPtr, float32}, {ConditionalBranch, float32}, {CBwithArg, float32}, {CallerHeight, float32}, {CallUsage, float32}, {IsRecursive, float32}, {NumCallsiteInLoop, float32}, {NumOfCallUsesInLoop, float32}, {EntryBlockFreq, float32}, {MaxCallsiteBlockFreq, float32}, {SuccessorPerBlock, float32}, {AvgVecInstr, float32}, {AvgNestedLoopLevel, float32}, {InstrPerLoop, float32} +Outputs={FH, int64} +Signature=serving_default +ModelDirectory=./models/ai4c-fh +OutputKey=output_0 +ModelInference= +# Above ModelInference need to be updated on python side diff --git a/acpo/model-ai4cmemop.acpo b/acpo/model-ai4cmemop.acpo new file mode 100644 index 0000000000000000000000000000000000000000..b60ba9a2a9bf10ff5b35ce82ceb58c3a70b84d17 --- /dev/null +++ b/acpo/model-ai4cmemop.acpo @@ -0,0 +1,8 @@ +ModelName=AI4CMEMOP +Features={BlockWithMultipleSuccecorsPerLoop, float32}, {PtrArgs, float32}, {MaxDomTreeLevel, float32}, {IsLinkOnceODR, float32}, {IsLocal, float32}, {Calls, float32}, {Blocks, float32}, {InitialSize, float32}, {MaxLoopDepth, float32}, {users, float32}, {InstructionPerBlock, float32}, {Loops, float32}, {conditionally_executed_blocks, float32}, {IsLinkOnce, float32}, {basic_block_count, float32}, {PtrCallee, float32}, {CallReturnPtr, float32}, {ConditionalBranch, float32}, {CBwithArg, float32}, {CallerHeight, float32}, {CallUsage, float32}, {IsRecursive, float32}, {NumCallsiteInLoop, float32}, {NumOfCallUsesInLoop, float32}, {EntryBlockFreq, float32}, {MaxCallsiteBlockFreq, float32}, {SuccessorPerBlock, float32}, {AvgVecInstr, float32}, {AvgNestedLoopLevel, float32}, {InstrPerLoop, float32}, {ends_with_branch, float32}, {ends_with_cond_branch, float32}, {ends_with_return, float32}, {ends_with_unreachable, float32}, {num_succs, float32}, {num_preds, float32}, {num_stores, float32}, {num_loads, float32}, {num_calls, float32}, {num_phis, float32}, {num_inst, float32}, {memop_type, float32}, {dst_align, float32}, {dst_from, float32}, {src_align, float32}, {src_from, float32}, {opt_size, float32} +Outputs={OPT, int64} +Signature=serving_default +ModelDirectory=./models/ai4c-memop +OutputKey=output_0 +ModelInference= +# Above ModelInference need to be updated on python side diff --git a/acpo/model-bw.acpo b/acpo/model-bw.acpo new file mode 100644 index 0000000000000000000000000000000000000000..20a35f94a81fa8c8780fc25eb6387ef2894f8e5e --- /dev/null +++ b/acpo/model-bw.acpo @@ -0,0 +1,7 @@ +ModelName=BW +Features={BlockWithMultipleSuccecorsPerLoop, float32},{PtrArgs, float32},{MaxDomTreeLevel, float32},{IsLinkOnceODR, float32},{IsLocal, float32},{Calls, float32},{Blocks, float32},{InitialSize, float32},{MaxLoopDepth, float32},{users, float32},{InstructionPerBlock, float32},{Loops, float32},{conditionally_executed_blocks, float32},{IsLinkOnce, float32},{basic_block_count, float32},{PtrCallee, float32},{CallReturnPtr, float32},{ConditionalBranch, float32},{CBwithArg, float32},{CallerHeight, float32},{CallUsage, float32},{IsRecursive, float32},{NumCallsiteInLoop, float32},{NumOfCallUsesInLoop, float32},{EntryBlockFreq, float32},{MaxCallsiteBlockFreq, float32},{SuccessorPerBlock, float32},{AvgVecInstr, float32},{AvgNestedLoopLevel, float32},{InstrPerLoop, float32},{num_successors, float32},{num_instrs, float32},{num_critical_edges, float32},{highest_num_instrs_in_succ, float32},{succ_num_with_highest_num_instrs, float32},{is_branch_inst, float32},{is_switch_inst, float32},{is_indirect_br_inst, float32},{is_invoke_inst, float32},{is_call_br_inst, float32},{is_second_succ_in_loop, float32},{is_first_succ_in_loop, float32},{is_bb_in_loop, float32},{is_iv_cmp, float32},{is_le_cmp, float32},{is_ge_cmp, float32},{is_lt_cmp, float32},{is_gt_cmp, float32},{is_ne_cmp, float32},{is_eq_cmp, float32},{is_second_op_constant, float32},{is_second_op_null, float32},{is_first_op_ptr, float32},{dest_num_successors, float32},{dest_num_instrs, float32},{dest_num_critical_edges, float32},{dest_is_branch_inst, float32},{dest_is_switch_inst, float32},{dest_is_indirect_br_inst, float32},{dest_is_invoke_inst, float32},{dest_is_call_br_inst, float32},{dest_succ_number, float32} +Outputs={BW-BranchWeight, int64} +Signature=serving_default +ModelDirectory=./models/ai4c-bw +OutputKey=output_0 +ModelInference=BWInference \ No newline at end of file diff --git a/acpo/models/ai4c-bw/BWCompiledModel-AARCH64.h b/acpo/models/ai4c-bw/BWCompiledModel-AARCH64.h new file mode 100644 index 0000000000000000000000000000000000000000..7d2f1198000a0dfd82f5ae39e93354aba5e78872 --- /dev/null +++ b/acpo/models/ai4c-bw/BWCompiledModel-AARCH64.h @@ -0,0 +1,413 @@ +// Generated by tfcompile, the TensorFlow graph compiler. DO NOT EDIT! +// +// This header was generated via ahead-of-time compilation of a TensorFlow +// graph. An object file corresponding to this header was also generated. +// This header gives access to the functionality in that object file. +// +// clang-format off + +#ifndef TFCOMPILE_GENERATED__xla_BWCompiledModel_AARCH64_llvm_BWCompiledModel_H_ // NOLINT(build/header_guard) +#define TFCOMPILE_GENERATED__xla_BWCompiledModel_AARCH64_llvm_BWCompiledModel_H_ // NOLINT(build/header_guard) + + + +#include "tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h" +#include "tensorflow/core/platform/types.h" + +namespace Eigen { struct ThreadPoolDevice; } +namespace xla { class ExecutableRunOptions; } + +// (Implementation detail) Entry point to the function in the object file. +extern "C" void _xla_BWCompiledModel_AARCH64_llvm_BWCompiledModel( + void* result, const ::xla::ExecutableRunOptions* run_options, + const void** args, void** temps, XlaCustomCallStatus* status, + int64_t* profile_counters); + + + + +namespace llvm { + +// BWCompiledModel represents a computation previously specified in a +// TensorFlow graph, now compiled into executable code. This extends the generic +// XlaCompiledCpuFunction class with statically type-safe arg and result +// methods. Usage example: +// +// BWCompiledModel computation; +// // ...set args using computation.argN methods +// CHECK(computation.Run()); +// // ...inspect results using computation.resultN methods +// +// The Run method invokes the actual computation, with inputs read from arg +// buffers, and outputs written to result buffers. Each Run call may also use +// a set of temporary buffers for the computation. +// +// By default each instance of this class manages its own arg, result and temp +// buffers. The AllocMode constructor parameter may be used to modify the +// buffer allocation strategy. +// +// Under the default allocation strategy, this class is thread-compatible: +// o Calls to non-const methods require exclusive access to the object. +// o Concurrent calls to const methods are OK, if those calls are made while it +// is guaranteed that no thread may call a non-const method. +// +// The logical function signature is: +// (arg0: f32[1,62]) -> (f32[1,1], f32[1,10,1]) +// +// Memory stats: +// arg bytes total: 248 +// arg bytes aligned: 256 +// temp bytes total: 1084 +// temp bytes aligned: 1216 +class BWCompiledModel final : public tensorflow::XlaCompiledCpuFunction { + public: + // Number of input arguments for the compiled computation. + static constexpr size_t kNumArgs = 1; + + static constexpr size_t kNumResults = 2; + + // Number of variables for the compiled computation. + static constexpr size_t kNumVariables = 0; + + // Byte size of each argument buffer. There are kNumArgs entries. + static const ::int64_t ArgSize(::tensorflow::int32 index) { + return BufferInfos()[ArgIndexToBufferIndex()[index]].size(); + } + + // Returns static data used to create an XlaCompiledCpuFunction. + static const tensorflow::XlaCompiledCpuFunction::StaticData& StaticData() { + static XlaCompiledCpuFunction::StaticData* kStaticData = [](){ + XlaCompiledCpuFunction::StaticData* data = + new XlaCompiledCpuFunction::StaticData; + set_static_data_raw_function(data, _xla_BWCompiledModel_AARCH64_llvm_BWCompiledModel); + set_static_data_buffer_infos(data, BufferInfos()); + set_static_data_num_buffers(data, kNumBuffers); + set_static_data_result_index_table(data, ResultIndexToBufferIndex()); + set_static_data_num_results(data, kNumResults); + set_static_data_arg_index_table(data, ArgIndexToBufferIndex()); + set_static_data_num_args(data, kNumArgs); + set_static_data_num_variables(data, kNumVariables); + set_static_data_result_index(data, kResultIndex); + set_static_data_arg_shape_infos(data, ArgShapeInfos()); + set_static_data_result_shape_infos(data, ResultShapeInfos()); + set_static_data_arg_names(data, StaticArgNames()); + set_static_data_variable_names(data, StaticVariableNames()); + set_static_data_result_names(data, StaticResultNames()); + set_static_data_program_shape(data, StaticProgramShape()); + set_static_data_hlo_profile_printer_data( + data, StaticHloProfilePrinterData()); + set_static_data_use_xla_runtime(data, false); + + return data; + }(); + return *kStaticData; + } + + BWCompiledModel(AllocMode alloc_mode = + AllocMode::ARGS_VARIABLES_RESULTS_PROFILES_AND_TEMPS) + : XlaCompiledCpuFunction(StaticData(), alloc_mode) {} + + BWCompiledModel(const BWCompiledModel&) = delete; + BWCompiledModel& operator=(const BWCompiledModel&) = delete; + + // Arg methods for managing input buffers. Buffers are in row-major order. + // There is a set of methods for each positional argument, with the following + // general form: + // + // void set_argN_data(void* data) + // Sets the buffer of type T for positional argument N. May be called in + // any AllocMode. Must be called before Run to have an effect. Must be + // called in AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY for each positional + // argument, to set the argument buffers. + // + // T* argN_data() + // Returns the buffer of type T for positional argument N. + // + // T& argN(...dim indices...) + // Returns a reference to the value of type T for positional argument N, + // with dim indices specifying which value. No bounds checking is performed + // on dim indices. + + void set_arg0_data(const void* data) { + set_arg_data(0, data); + } + float* arg0_data() { + return static_cast(arg_data(0)); + } + float& arg0(size_t dim0, size_t dim1) { + return (*static_cast( + arg_data(0)))[dim0][dim1]; + } + const float* arg0_data() const { + return static_cast(arg_data(0)); + } + const float& arg0(size_t dim0, size_t dim1) const { + return (*static_cast( + arg_data(0)))[dim0][dim1]; + } + int arg0_size() const { + return 62 * sizeof(float); + } + int arg0_count() const { + return 62; + } + + void set_arg_feed_input_1_data(const void* data) { + set_arg_data(0, data); + } + float* arg_feed_input_1_data() { + return static_cast(arg_data(0)); + } + float& arg_feed_input_1(size_t dim0, size_t dim1) { + return (*static_cast( + arg_data(0)))[dim0][dim1]; + } + const float* arg_feed_input_1_data() const { + return static_cast(arg_data(0)); + } + const float& arg_feed_input_1(size_t dim0, size_t dim1) const { + return (*static_cast( + arg_data(0)))[dim0][dim1]; + } + int arg_feed_input_1_size() const { + return 62 * sizeof(float); + } + int arg_feed_input_1_count() const { + return 62; + } + + // Result methods for managing output buffers. Buffers are in row-major order. + // Must only be called after a successful Run call. There is a set of methods + // for each positional result, with the following general form: + // + // T* resultN_data() + // Returns the buffer of type T for positional result N. + // + // T& resultN(...dim indices...) + // Returns a reference to the value of type T for positional result N, + // with dim indices specifying which value. No bounds checking is performed + // on dim indices. + // + // Unlike the arg methods, there is no set_resultN_data method. The result + // buffers are managed internally, and may change after each call to Run. + + float* result0_data() { + return static_cast(result_data(0)); + } + float& result0(size_t dim0, size_t dim1) { + return (*static_cast( + result_data(0)))[dim0][dim1]; + } + const float* result0_data() const { + return static_cast(result_data(0)); + } + const float& result0(size_t dim0, size_t dim1) const { + return (*static_cast( + result_data(0)))[dim0][dim1]; + } + int result0_size() const { + return 1 * sizeof(float); + } + int result0_count() const { + return 1; + } + + float* result_fetch_output_1_data() { + return static_cast(result_data(0)); + } + float& result_fetch_output_1(size_t dim0, size_t dim1) { + return (*static_cast( + result_data(0)))[dim0][dim1]; + } + const float* result_fetch_output_1_data() const { + return static_cast(result_data(0)); + } + const float& result_fetch_output_1(size_t dim0, size_t dim1) const { + return (*static_cast( + result_data(0)))[dim0][dim1]; + } + int result_fetch_output_1_size() const { + return 1 * sizeof(float); + } + int result_fetch_output_1_count() const { + return 1; + } + + float* result1_data() { + return static_cast(result_data(1)); + } + float& result1(size_t dim0, size_t dim1, size_t dim2) { + return (*static_cast( + result_data(1)))[dim0][dim1][dim2]; + } + const float* result1_data() const { + return static_cast(result_data(1)); + } + const float& result1(size_t dim0, size_t dim1, size_t dim2) const { + return (*static_cast( + result_data(1)))[dim0][dim1][dim2]; + } + int result1_size() const { + return 10 * sizeof(float); + } + int result1_count() const { + return 10; + } + + float* result_fetch_output_0_data() { + return static_cast(result_data(1)); + } + float& result_fetch_output_0(size_t dim0, size_t dim1, size_t dim2) { + return (*static_cast( + result_data(1)))[dim0][dim1][dim2]; + } + const float* result_fetch_output_0_data() const { + return static_cast(result_data(1)); + } + const float& result_fetch_output_0(size_t dim0, size_t dim1, size_t dim2) const { + return (*static_cast( + result_data(1)))[dim0][dim1][dim2]; + } + int result_fetch_output_0_size() const { + return 10 * sizeof(float); + } + int result_fetch_output_0_count() const { + return 10; + } + + // Methods for managing variable buffers. Buffers are in row-major order. + // + // For read-write variables we generate the following methods: + // + // void set_var_X_data(T* data) + // Sets the buffer for variable X. Must be called before Run if the + // allocation mode is RESULTS_PROFILES_AND_TEMPS_ONLY. + // + // T* var_X_data() + // Returns the buffer of type T for variable X. If the allocation mode is + // RESULTS_PROFILES_AND_TEMPS_ONLY then this buffer is the same as the + // buffer passed to set_var_X_data. + // + // T& var_X(...dim indices...) + // Returns a reference to the value of type T for variable X, + // with dim indices specifying which value. No bounds checking is performed + // on dim indices. + // + // For readonly variables we generate the same set of methods, except that we + // use `const T` instead of `T`. We use `const T` to avoid erasing the + // constness of the buffer passed to `set_var_X_data` but the underlying + // buffer is not const (and thus the const can be safely const-cast'ed away) + // unless `set_var_X_data` is called with a pointer to constant storage. + + private: + // Number of buffers for the compiled computation. + static constexpr size_t kNumBuffers = 21; + + static const ::xla::cpu_function_runtime::BufferInfo* BufferInfos() { + static const ::xla::cpu_function_runtime::BufferInfo + kBufferInfos[kNumBuffers] = { +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{131072ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{126976ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{32768ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{8192ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{4096ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{4096ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{2560ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{2048ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{1024ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{994ULL, 0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{512ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{256ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{256ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{256ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{256ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{161ULL, ~0U, 1U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{160ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{65ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{17ULL, ~0U, 0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{16ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{4097ULL, ~0U, ~0U}) + }; + return kBufferInfos; + } + + static const ::tensorflow::int32* ResultIndexToBufferIndex() { + static constexpr ::tensorflow::int32 kResultIndexToBufferIndex[kNumResults] = { +18, 15 + }; + return kResultIndexToBufferIndex; + } + + static const ::tensorflow::int32* ArgIndexToBufferIndex() { + static constexpr ::tensorflow::int32 kArgIndexToBufferIndex[kNumArgs] = { +9 + }; + return kArgIndexToBufferIndex; + } + + // The 0-based index of the result tuple in the temporary buffers. + static constexpr size_t kResultIndex = 17; + + // Shapes of the input arguments. + static constexpr int32_t kArg0Shapes[] = { +1, 62 + }; + static const ShapeInfo* ArgShapeInfos() { + static constexpr ShapeInfo kArgShapeInfoTable[kNumArgs] = { +{ kArg0Shapes, 2 }, + }; + return kArgShapeInfoTable; + }; + + // Shapes of the results. + static constexpr int32_t kResult0Shapes[] = { +1, 1 + }; + static constexpr int32_t kResult1Shapes[] = { +1, 10, 1 + }; + static const ShapeInfo* ResultShapeInfos() { + static constexpr ShapeInfo kResultShapeInfoTable[kNumResults] = { +{ kResult0Shapes, 2 }, +{ kResult1Shapes, 3 }, + }; + return kResultShapeInfoTable; + }; + + // Array of names of each positional argument, terminated by nullptr. + static const char** StaticArgNames() { + static const char* kNames[] = {"feed_input_1", nullptr}; + return kNames; + } + + // Array of names of each positional variable, terminated by nullptr. + static const char** StaticVariableNames() { + static const char* kNames[] = {nullptr}; + return kNames; + } + + // Array of names of each positional result, terminated by nullptr. + static const char** StaticResultNames() { + static const char* kNames[] = {"fetch_output_1", "fetch_output_0", nullptr}; + return kNames; + } + + // Shape of the args and results. + static const ::xla::ProgramShapeProto* StaticProgramShape() { + static const ::xla::ProgramShapeProto* kShape = nullptr; + return kShape; + } + + // Metadata that can be used to pretty-print profile counters. + static const ::xla::HloProfilePrinterData* StaticHloProfilePrinterData() { + static const ::xla::HloProfilePrinterData* kHloProfilePrinterData = + nullptr; + return kHloProfilePrinterData; + } +}; + +} // end namespace llvm + +#endif // TFCOMPILE_GENERATED__xla_BWCompiledModel_AARCH64_llvm_BWCompiledModel_H_ + +// clang-format on diff --git a/acpo/models/ai4c-bw/BWCompiledModel-AARCH64.o b/acpo/models/ai4c-bw/BWCompiledModel-AARCH64.o new file mode 100644 index 0000000000000000000000000000000000000000..1159accd7e26bf8e81567e38f8ff15a7da7306c0 Binary files /dev/null and b/acpo/models/ai4c-bw/BWCompiledModel-AARCH64.o differ diff --git a/acpo/models/ai4c-bw/BWCompiledModel-AARCH64_metadata.o b/acpo/models/ai4c-bw/BWCompiledModel-AARCH64_metadata.o new file mode 100644 index 0000000000000000000000000000000000000000..cc02d5395c9cbe40bd5f9160f354848878b08721 Binary files /dev/null and b/acpo/models/ai4c-bw/BWCompiledModel-AARCH64_metadata.o differ diff --git a/acpo/models/ai4c-bw/saved_model.pb b/acpo/models/ai4c-bw/saved_model.pb new file mode 100644 index 0000000000000000000000000000000000000000..1b357fa46465cbe9226ebc5c83f51c93e5863db3 Binary files /dev/null and b/acpo/models/ai4c-bw/saved_model.pb differ diff --git a/acpo/models/ai4c-bw/sc.pkl b/acpo/models/ai4c-bw/sc.pkl new file mode 100644 index 0000000000000000000000000000000000000000..71f2f78ef9e44c61f8f8fc4a3b179b970ef853af Binary files /dev/null and b/acpo/models/ai4c-bw/sc.pkl differ diff --git a/acpo/models/ai4c-bw/variables/variables.data-00000-of-00001 b/acpo/models/ai4c-bw/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..75c0b74a789b88b033a473fb3470c987075e1065 Binary files /dev/null and b/acpo/models/ai4c-bw/variables/variables.data-00000-of-00001 differ diff --git a/acpo/models/ai4c-bw/variables/variables.index b/acpo/models/ai4c-bw/variables/variables.index new file mode 100644 index 0000000000000000000000000000000000000000..6d9e73c1d27edda44a04315e61a820329a5374a8 Binary files /dev/null and b/acpo/models/ai4c-bw/variables/variables.index differ diff --git a/acpo/models/ai4c-fh/AI4CFHCompiledModel-AARCH64.h b/acpo/models/ai4c-fh/AI4CFHCompiledModel-AARCH64.h new file mode 100644 index 0000000000000000000000000000000000000000..f854200380637541cadc8d0b359ad97aa0c6b859 --- /dev/null +++ b/acpo/models/ai4c-fh/AI4CFHCompiledModel-AARCH64.h @@ -0,0 +1,358 @@ +// Generated by tfcompile, the TensorFlow graph compiler. DO NOT EDIT! +// +// This header was generated via ahead-of-time compilation of a TensorFlow +// graph. An object file corresponding to this header was also generated. +// This header gives access to the functionality in that object file. +// +// clang-format off + +#ifndef TFCOMPILE_GENERATED__xla_AI4CFHCompiledModel_AARCH64_llvm_AI4CFHCompiledModel_H_ // NOLINT(build/header_guard) +#define TFCOMPILE_GENERATED__xla_AI4CFHCompiledModel_AARCH64_llvm_AI4CFHCompiledModel_H_ // NOLINT(build/header_guard) + + + +#include "tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h" +#include "tensorflow/core/platform/types.h" + +namespace Eigen { struct ThreadPoolDevice; } +namespace xla { class ExecutableRunOptions; } + +// (Implementation detail) Entry point to the function in the object file. +extern "C" void _xla_AI4CFHCompiledModel_AARCH64_llvm_AI4CFHCompiledModel( + void* result, const ::xla::ExecutableRunOptions* run_options, + const void** args, void** temps, XlaCustomCallStatus* status, + int64_t* profile_counters); + + + + +namespace llvm { + +// AI4CFHCompiledModel represents a computation previously specified in a +// TensorFlow graph, now compiled into executable code. This extends the generic +// XlaCompiledCpuFunction class with statically type-safe arg and result +// methods. Usage example: +// +// AI4CFHCompiledModel computation; +// // ...set args using computation.argN methods +// CHECK(computation.Run()); +// // ...inspect results using computation.resultN methods +// +// The Run method invokes the actual computation, with inputs read from arg +// buffers, and outputs written to result buffers. Each Run call may also use +// a set of temporary buffers for the computation. +// +// By default each instance of this class manages its own arg, result and temp +// buffers. The AllocMode constructor parameter may be used to modify the +// buffer allocation strategy. +// +// Under the default allocation strategy, this class is thread-compatible: +// o Calls to non-const methods require exclusive access to the object. +// o Concurrent calls to const methods are OK, if those calls are made while it +// is guaranteed that no thread may call a non-const method. +// +// The logical function signature is: +// (arg0: f32[1,30]) -> (f32[1,3]) +// +// Memory stats: +// arg bytes total: 120 +// arg bytes aligned: 128 +// temp bytes total: 8212 +// temp bytes aligned: 8320 +class AI4CFHCompiledModel final : public tensorflow::XlaCompiledCpuFunction { + public: + // Number of input arguments for the compiled computation. + static constexpr size_t kNumArgs = 1; + + static constexpr size_t kNumResults = 1; + + // Number of variables for the compiled computation. + static constexpr size_t kNumVariables = 0; + + // Byte size of each argument buffer. There are kNumArgs entries. + static const ::int64_t ArgSize(::tensorflow::int32 index) { + return BufferInfos()[ArgIndexToBufferIndex()[index]].size(); + } + + // Returns static data used to create an XlaCompiledCpuFunction. + static const tensorflow::XlaCompiledCpuFunction::StaticData& StaticData() { + static XlaCompiledCpuFunction::StaticData* kStaticData = [](){ + XlaCompiledCpuFunction::StaticData* data = + new XlaCompiledCpuFunction::StaticData; + set_static_data_raw_function(data, _xla_AI4CFHCompiledModel_AARCH64_llvm_AI4CFHCompiledModel); + set_static_data_buffer_infos(data, BufferInfos()); + set_static_data_num_buffers(data, kNumBuffers); + set_static_data_result_index_table(data, ResultIndexToBufferIndex()); + set_static_data_num_results(data, kNumResults); + set_static_data_arg_index_table(data, ArgIndexToBufferIndex()); + set_static_data_num_args(data, kNumArgs); + set_static_data_num_variables(data, kNumVariables); + set_static_data_result_index(data, kResultIndex); + set_static_data_arg_shape_infos(data, ArgShapeInfos()); + set_static_data_result_shape_infos(data, ResultShapeInfos()); + set_static_data_arg_names(data, StaticArgNames()); + set_static_data_variable_names(data, StaticVariableNames()); + set_static_data_result_names(data, StaticResultNames()); + set_static_data_program_shape(data, StaticProgramShape()); + set_static_data_hlo_profile_printer_data( + data, StaticHloProfilePrinterData()); + set_static_data_use_xla_runtime(data, false); + + return data; + }(); + return *kStaticData; + } + + AI4CFHCompiledModel(AllocMode alloc_mode = + AllocMode::ARGS_VARIABLES_RESULTS_PROFILES_AND_TEMPS) + : XlaCompiledCpuFunction(StaticData(), alloc_mode) {} + + AI4CFHCompiledModel(const AI4CFHCompiledModel&) = delete; + AI4CFHCompiledModel& operator=(const AI4CFHCompiledModel&) = delete; + + // Arg methods for managing input buffers. Buffers are in row-major order. + // There is a set of methods for each positional argument, with the following + // general form: + // + // void set_argN_data(void* data) + // Sets the buffer of type T for positional argument N. May be called in + // any AllocMode. Must be called before Run to have an effect. Must be + // called in AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY for each positional + // argument, to set the argument buffers. + // + // T* argN_data() + // Returns the buffer of type T for positional argument N. + // + // T& argN(...dim indices...) + // Returns a reference to the value of type T for positional argument N, + // with dim indices specifying which value. No bounds checking is performed + // on dim indices. + + void set_arg0_data(const void* data) { + set_arg_data(0, data); + } + float* arg0_data() { + return static_cast(arg_data(0)); + } + float& arg0(size_t dim0, size_t dim1) { + return (*static_cast( + arg_data(0)))[dim0][dim1]; + } + const float* arg0_data() const { + return static_cast(arg_data(0)); + } + const float& arg0(size_t dim0, size_t dim1) const { + return (*static_cast( + arg_data(0)))[dim0][dim1]; + } + int arg0_size() const { + return 30 * sizeof(float); + } + int arg0_count() const { + return 30; + } + + void set_arg_feed_input_1_data(const void* data) { + set_arg_data(0, data); + } + float* arg_feed_input_1_data() { + return static_cast(arg_data(0)); + } + float& arg_feed_input_1(size_t dim0, size_t dim1) { + return (*static_cast( + arg_data(0)))[dim0][dim1]; + } + const float* arg_feed_input_1_data() const { + return static_cast(arg_data(0)); + } + const float& arg_feed_input_1(size_t dim0, size_t dim1) const { + return (*static_cast( + arg_data(0)))[dim0][dim1]; + } + int arg_feed_input_1_size() const { + return 30 * sizeof(float); + } + int arg_feed_input_1_count() const { + return 30; + } + + // Result methods for managing output buffers. Buffers are in row-major order. + // Must only be called after a successful Run call. There is a set of methods + // for each positional result, with the following general form: + // + // T* resultN_data() + // Returns the buffer of type T for positional result N. + // + // T& resultN(...dim indices...) + // Returns a reference to the value of type T for positional result N, + // with dim indices specifying which value. No bounds checking is performed + // on dim indices. + // + // Unlike the arg methods, there is no set_resultN_data method. The result + // buffers are managed internally, and may change after each call to Run. + + float* result0_data() { + return static_cast(result_data(0)); + } + float& result0(size_t dim0, size_t dim1) { + return (*static_cast( + result_data(0)))[dim0][dim1]; + } + const float* result0_data() const { + return static_cast(result_data(0)); + } + const float& result0(size_t dim0, size_t dim1) const { + return (*static_cast( + result_data(0)))[dim0][dim1]; + } + int result0_size() const { + return 3 * sizeof(float); + } + int result0_count() const { + return 3; + } + + float* result_fetch_output_0_data() { + return static_cast(result_data(0)); + } + float& result_fetch_output_0(size_t dim0, size_t dim1) { + return (*static_cast( + result_data(0)))[dim0][dim1]; + } + const float* result_fetch_output_0_data() const { + return static_cast(result_data(0)); + } + const float& result_fetch_output_0(size_t dim0, size_t dim1) const { + return (*static_cast( + result_data(0)))[dim0][dim1]; + } + int result_fetch_output_0_size() const { + return 3 * sizeof(float); + } + int result_fetch_output_0_count() const { + return 3; + } + + // Methods for managing variable buffers. Buffers are in row-major order. + // + // For read-write variables we generate the following methods: + // + // void set_var_X_data(T* data) + // Sets the buffer for variable X. Must be called before Run if the + // allocation mode is RESULTS_PROFILES_AND_TEMPS_ONLY. + // + // T* var_X_data() + // Returns the buffer of type T for variable X. If the allocation mode is + // RESULTS_PROFILES_AND_TEMPS_ONLY then this buffer is the same as the + // buffer passed to set_var_X_data. + // + // T& var_X(...dim indices...) + // Returns a reference to the value of type T for variable X, + // with dim indices specifying which value. No bounds checking is performed + // on dim indices. + // + // For readonly variables we generate the same set of methods, except that we + // use `const T` instead of `T`. We use `const T` to avoid erasing the + // constness of the buffer passed to `set_var_X_data` but the underlying + // buffer is not const (and thus the const can be safely const-cast'ed away) + // unless `set_var_X_data` is called with a pointer to constant storage. + + private: + // Number of buffers for the compiled computation. + static constexpr size_t kNumBuffers = 12; + + static const ::xla::cpu_function_runtime::BufferInfo* BufferInfos() { + static const ::xla::cpu_function_runtime::BufferInfo + kBufferInfos[kNumBuffers] = { +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{16777216ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{491520ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{212992ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{16384ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{16384ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{624ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{482ULL, 0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{208ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{49ULL, ~0U, 0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{48ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{33ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{32769ULL, ~0U, ~0U}) + }; + return kBufferInfos; + } + + static const ::tensorflow::int32* ResultIndexToBufferIndex() { + static constexpr ::tensorflow::int32 kResultIndexToBufferIndex[kNumResults] = { +8 + }; + return kResultIndexToBufferIndex; + } + + static const ::tensorflow::int32* ArgIndexToBufferIndex() { + static constexpr ::tensorflow::int32 kArgIndexToBufferIndex[kNumArgs] = { +6 + }; + return kArgIndexToBufferIndex; + } + + // The 0-based index of the result tuple in the temporary buffers. + static constexpr size_t kResultIndex = 10; + + // Shapes of the input arguments. + static constexpr int32_t kArg0Shapes[] = { +1, 30 + }; + static const ShapeInfo* ArgShapeInfos() { + static constexpr ShapeInfo kArgShapeInfoTable[kNumArgs] = { +{ kArg0Shapes, 2 }, + }; + return kArgShapeInfoTable; + }; + + // Shapes of the results. + static constexpr int32_t kResult0Shapes[] = { +1, 3 + }; + static const ShapeInfo* ResultShapeInfos() { + static constexpr ShapeInfo kResultShapeInfoTable[kNumResults] = { +{ kResult0Shapes, 2 }, + }; + return kResultShapeInfoTable; + }; + + // Array of names of each positional argument, terminated by nullptr. + static const char** StaticArgNames() { + static const char* kNames[] = {"feed_input_1", nullptr}; + return kNames; + } + + // Array of names of each positional variable, terminated by nullptr. + static const char** StaticVariableNames() { + static const char* kNames[] = {nullptr}; + return kNames; + } + + // Array of names of each positional result, terminated by nullptr. + static const char** StaticResultNames() { + static const char* kNames[] = {"fetch_output_0", nullptr}; + return kNames; + } + + // Shape of the args and results. + static const ::xla::ProgramShapeProto* StaticProgramShape() { + static const ::xla::ProgramShapeProto* kShape = nullptr; + return kShape; + } + + // Metadata that can be used to pretty-print profile counters. + static const ::xla::HloProfilePrinterData* StaticHloProfilePrinterData() { + static const ::xla::HloProfilePrinterData* kHloProfilePrinterData = + nullptr; + return kHloProfilePrinterData; + } +}; + +} // end namespace llvm + +#endif // TFCOMPILE_GENERATED__xla_AI4CFHCompiledModel_AARCH64_llvm_AI4CFHCompiledModel_H_ + +// clang-format on diff --git a/acpo/models/ai4c-fh/AI4CFHCompiledModel-AARCH64.o b/acpo/models/ai4c-fh/AI4CFHCompiledModel-AARCH64.o new file mode 100644 index 0000000000000000000000000000000000000000..1efb5ffcc028c9033aa9e748804091da5a8ff739 Binary files /dev/null and b/acpo/models/ai4c-fh/AI4CFHCompiledModel-AARCH64.o differ diff --git a/acpo/models/ai4c-fh/AI4CFHCompiledModel-AARCH64_metadata.o b/acpo/models/ai4c-fh/AI4CFHCompiledModel-AARCH64_metadata.o new file mode 100644 index 0000000000000000000000000000000000000000..cc02d5395c9cbe40bd5f9160f354848878b08721 Binary files /dev/null and b/acpo/models/ai4c-fh/AI4CFHCompiledModel-AARCH64_metadata.o differ diff --git a/acpo/models/ai4c-fh/saved_model.pb b/acpo/models/ai4c-fh/saved_model.pb new file mode 100644 index 0000000000000000000000000000000000000000..8702645175054beff35ef7b6eab77deec6113c21 Binary files /dev/null and b/acpo/models/ai4c-fh/saved_model.pb differ diff --git a/acpo/models/ai4c-fh/sc.pkl b/acpo/models/ai4c-fh/sc.pkl new file mode 100644 index 0000000000000000000000000000000000000000..72e7283b912227d186d99dd9ef8519c78b4552bb Binary files /dev/null and b/acpo/models/ai4c-fh/sc.pkl differ diff --git a/acpo/models/ai4c-fh/variables/variables.data-00000-of-00001 b/acpo/models/ai4c-fh/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..84f6bd73d7523bd92d84365ab9b26974b423c9c3 Binary files /dev/null and b/acpo/models/ai4c-fh/variables/variables.data-00000-of-00001 differ diff --git a/acpo/models/ai4c-fh/variables/variables.index b/acpo/models/ai4c-fh/variables/variables.index new file mode 100644 index 0000000000000000000000000000000000000000..dcdb372b626ab6efee8c1042f66bb1756773e96b Binary files /dev/null and b/acpo/models/ai4c-fh/variables/variables.index differ diff --git a/acpo/models/ai4c-memop/AI4CMEMOPCompiledModel-AARCH64.h b/acpo/models/ai4c-memop/AI4CMEMOPCompiledModel-AARCH64.h new file mode 100644 index 0000000000000000000000000000000000000000..fb8439a07f79777efdb75e7ce7b8976a9bfe3de0 --- /dev/null +++ b/acpo/models/ai4c-memop/AI4CMEMOPCompiledModel-AARCH64.h @@ -0,0 +1,358 @@ +// Generated by tfcompile, the TensorFlow graph compiler. DO NOT EDIT! +// +// This header was generated via ahead-of-time compilation of a TensorFlow +// graph. An object file corresponding to this header was also generated. +// This header gives access to the functionality in that object file. +// +// clang-format off + +#ifndef TFCOMPILE_GENERATED__xla_AI4CMEMOPCompiledModel_AARCH64_llvm_AI4CMEMOPCompiledModel_H_ // NOLINT(build/header_guard) +#define TFCOMPILE_GENERATED__xla_AI4CMEMOPCompiledModel_AARCH64_llvm_AI4CMEMOPCompiledModel_H_ // NOLINT(build/header_guard) + + + +#include "tensorflow/compiler/tf2xla/xla_compiled_cpu_function.h" +#include "tensorflow/core/platform/types.h" + +namespace Eigen { struct ThreadPoolDevice; } +namespace xla { class ExecutableRunOptions; } + +// (Implementation detail) Entry point to the function in the object file. +extern "C" void _xla_AI4CMEMOPCompiledModel_AARCH64_llvm_AI4CMEMOPCompiledModel( + void* result, const ::xla::ExecutableRunOptions* run_options, + const void** args, void** temps, XlaCustomCallStatus* status, + int64_t* profile_counters); + + + + +namespace llvm { + +// AI4CMEMOPCompiledModel represents a computation previously specified in a +// TensorFlow graph, now compiled into executable code. This extends the generic +// XlaCompiledCpuFunction class with statically type-safe arg and result +// methods. Usage example: +// +// AI4CMEMOPCompiledModel computation; +// // ...set args using computation.argN methods +// CHECK(computation.Run()); +// // ...inspect results using computation.resultN methods +// +// The Run method invokes the actual computation, with inputs read from arg +// buffers, and outputs written to result buffers. Each Run call may also use +// a set of temporary buffers for the computation. +// +// By default each instance of this class manages its own arg, result and temp +// buffers. The AllocMode constructor parameter may be used to modify the +// buffer allocation strategy. +// +// Under the default allocation strategy, this class is thread-compatible: +// o Calls to non-const methods require exclusive access to the object. +// o Concurrent calls to const methods are OK, if those calls are made while it +// is guaranteed that no thread may call a non-const method. +// +// The logical function signature is: +// (arg0: f32[1,47]) -> (f32[1,2]) +// +// Memory stats: +// arg bytes total: 188 +// arg bytes aligned: 192 +// temp bytes total: 8208 +// temp bytes aligned: 8320 +class AI4CMEMOPCompiledModel final : public tensorflow::XlaCompiledCpuFunction { + public: + // Number of input arguments for the compiled computation. + static constexpr size_t kNumArgs = 1; + + static constexpr size_t kNumResults = 1; + + // Number of variables for the compiled computation. + static constexpr size_t kNumVariables = 0; + + // Byte size of each argument buffer. There are kNumArgs entries. + static const ::int64_t ArgSize(::tensorflow::int32 index) { + return BufferInfos()[ArgIndexToBufferIndex()[index]].size(); + } + + // Returns static data used to create an XlaCompiledCpuFunction. + static const tensorflow::XlaCompiledCpuFunction::StaticData& StaticData() { + static XlaCompiledCpuFunction::StaticData* kStaticData = [](){ + XlaCompiledCpuFunction::StaticData* data = + new XlaCompiledCpuFunction::StaticData; + set_static_data_raw_function(data, _xla_AI4CMEMOPCompiledModel_AARCH64_llvm_AI4CMEMOPCompiledModel); + set_static_data_buffer_infos(data, BufferInfos()); + set_static_data_num_buffers(data, kNumBuffers); + set_static_data_result_index_table(data, ResultIndexToBufferIndex()); + set_static_data_num_results(data, kNumResults); + set_static_data_arg_index_table(data, ArgIndexToBufferIndex()); + set_static_data_num_args(data, kNumArgs); + set_static_data_num_variables(data, kNumVariables); + set_static_data_result_index(data, kResultIndex); + set_static_data_arg_shape_infos(data, ArgShapeInfos()); + set_static_data_result_shape_infos(data, ResultShapeInfos()); + set_static_data_arg_names(data, StaticArgNames()); + set_static_data_variable_names(data, StaticVariableNames()); + set_static_data_result_names(data, StaticResultNames()); + set_static_data_program_shape(data, StaticProgramShape()); + set_static_data_hlo_profile_printer_data( + data, StaticHloProfilePrinterData()); + set_static_data_use_xla_runtime(data, false); + + return data; + }(); + return *kStaticData; + } + + AI4CMEMOPCompiledModel(AllocMode alloc_mode = + AllocMode::ARGS_VARIABLES_RESULTS_PROFILES_AND_TEMPS) + : XlaCompiledCpuFunction(StaticData(), alloc_mode) {} + + AI4CMEMOPCompiledModel(const AI4CMEMOPCompiledModel&) = delete; + AI4CMEMOPCompiledModel& operator=(const AI4CMEMOPCompiledModel&) = delete; + + // Arg methods for managing input buffers. Buffers are in row-major order. + // There is a set of methods for each positional argument, with the following + // general form: + // + // void set_argN_data(void* data) + // Sets the buffer of type T for positional argument N. May be called in + // any AllocMode. Must be called before Run to have an effect. Must be + // called in AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY for each positional + // argument, to set the argument buffers. + // + // T* argN_data() + // Returns the buffer of type T for positional argument N. + // + // T& argN(...dim indices...) + // Returns a reference to the value of type T for positional argument N, + // with dim indices specifying which value. No bounds checking is performed + // on dim indices. + + void set_arg0_data(const void* data) { + set_arg_data(0, data); + } + float* arg0_data() { + return static_cast(arg_data(0)); + } + float& arg0(size_t dim0, size_t dim1) { + return (*static_cast( + arg_data(0)))[dim0][dim1]; + } + const float* arg0_data() const { + return static_cast(arg_data(0)); + } + const float& arg0(size_t dim0, size_t dim1) const { + return (*static_cast( + arg_data(0)))[dim0][dim1]; + } + int arg0_size() const { + return 47 * sizeof(float); + } + int arg0_count() const { + return 47; + } + + void set_arg_feed_input_1_data(const void* data) { + set_arg_data(0, data); + } + float* arg_feed_input_1_data() { + return static_cast(arg_data(0)); + } + float& arg_feed_input_1(size_t dim0, size_t dim1) { + return (*static_cast( + arg_data(0)))[dim0][dim1]; + } + const float* arg_feed_input_1_data() const { + return static_cast(arg_data(0)); + } + const float& arg_feed_input_1(size_t dim0, size_t dim1) const { + return (*static_cast( + arg_data(0)))[dim0][dim1]; + } + int arg_feed_input_1_size() const { + return 47 * sizeof(float); + } + int arg_feed_input_1_count() const { + return 47; + } + + // Result methods for managing output buffers. Buffers are in row-major order. + // Must only be called after a successful Run call. There is a set of methods + // for each positional result, with the following general form: + // + // T* resultN_data() + // Returns the buffer of type T for positional result N. + // + // T& resultN(...dim indices...) + // Returns a reference to the value of type T for positional result N, + // with dim indices specifying which value. No bounds checking is performed + // on dim indices. + // + // Unlike the arg methods, there is no set_resultN_data method. The result + // buffers are managed internally, and may change after each call to Run. + + float* result0_data() { + return static_cast(result_data(0)); + } + float& result0(size_t dim0, size_t dim1) { + return (*static_cast( + result_data(0)))[dim0][dim1]; + } + const float* result0_data() const { + return static_cast(result_data(0)); + } + const float& result0(size_t dim0, size_t dim1) const { + return (*static_cast( + result_data(0)))[dim0][dim1]; + } + int result0_size() const { + return 2 * sizeof(float); + } + int result0_count() const { + return 2; + } + + float* result_fetch_output_0_data() { + return static_cast(result_data(0)); + } + float& result_fetch_output_0(size_t dim0, size_t dim1) { + return (*static_cast( + result_data(0)))[dim0][dim1]; + } + const float* result_fetch_output_0_data() const { + return static_cast(result_data(0)); + } + const float& result_fetch_output_0(size_t dim0, size_t dim1) const { + return (*static_cast( + result_data(0)))[dim0][dim1]; + } + int result_fetch_output_0_size() const { + return 2 * sizeof(float); + } + int result_fetch_output_0_count() const { + return 2; + } + + // Methods for managing variable buffers. Buffers are in row-major order. + // + // For read-write variables we generate the following methods: + // + // void set_var_X_data(T* data) + // Sets the buffer for variable X. Must be called before Run if the + // allocation mode is RESULTS_PROFILES_AND_TEMPS_ONLY. + // + // T* var_X_data() + // Returns the buffer of type T for variable X. If the allocation mode is + // RESULTS_PROFILES_AND_TEMPS_ONLY then this buffer is the same as the + // buffer passed to set_var_X_data. + // + // T& var_X(...dim indices...) + // Returns a reference to the value of type T for variable X, + // with dim indices specifying which value. No bounds checking is performed + // on dim indices. + // + // For readonly variables we generate the same set of methods, except that we + // use `const T` instead of `T`. We use `const T` to avoid erasing the + // constness of the buffer passed to `set_var_X_data` but the underlying + // buffer is not const (and thus the const can be safely const-cast'ed away) + // unless `set_var_X_data` is called with a pointer to constant storage. + + private: + // Number of buffers for the compiled computation. + static constexpr size_t kNumBuffers = 12; + + static const ::xla::cpu_function_runtime::BufferInfo* BufferInfos() { + static const ::xla::cpu_function_runtime::BufferInfo + kBufferInfos[kNumBuffers] = { +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{16777216ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{770048ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{212992ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{16384ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{16384ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{754ULL, 0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{416ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{208ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{33ULL, ~0U, 0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{33ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{32ULL, ~0U, ~0U}), +::xla::cpu_function_runtime::BufferInfo(::xla::cpu_function_runtime::EncodedBufferInfo{32769ULL, ~0U, ~0U}) + }; + return kBufferInfos; + } + + static const ::tensorflow::int32* ResultIndexToBufferIndex() { + static constexpr ::tensorflow::int32 kResultIndexToBufferIndex[kNumResults] = { +8 + }; + return kResultIndexToBufferIndex; + } + + static const ::tensorflow::int32* ArgIndexToBufferIndex() { + static constexpr ::tensorflow::int32 kArgIndexToBufferIndex[kNumArgs] = { +5 + }; + return kArgIndexToBufferIndex; + } + + // The 0-based index of the result tuple in the temporary buffers. + static constexpr size_t kResultIndex = 9; + + // Shapes of the input arguments. + static constexpr int32_t kArg0Shapes[] = { +1, 47 + }; + static const ShapeInfo* ArgShapeInfos() { + static constexpr ShapeInfo kArgShapeInfoTable[kNumArgs] = { +{ kArg0Shapes, 2 }, + }; + return kArgShapeInfoTable; + }; + + // Shapes of the results. + static constexpr int32_t kResult0Shapes[] = { +1, 2 + }; + static const ShapeInfo* ResultShapeInfos() { + static constexpr ShapeInfo kResultShapeInfoTable[kNumResults] = { +{ kResult0Shapes, 2 }, + }; + return kResultShapeInfoTable; + }; + + // Array of names of each positional argument, terminated by nullptr. + static const char** StaticArgNames() { + static const char* kNames[] = {"feed_input_1", nullptr}; + return kNames; + } + + // Array of names of each positional variable, terminated by nullptr. + static const char** StaticVariableNames() { + static const char* kNames[] = {nullptr}; + return kNames; + } + + // Array of names of each positional result, terminated by nullptr. + static const char** StaticResultNames() { + static const char* kNames[] = {"fetch_output_0", nullptr}; + return kNames; + } + + // Shape of the args and results. + static const ::xla::ProgramShapeProto* StaticProgramShape() { + static const ::xla::ProgramShapeProto* kShape = nullptr; + return kShape; + } + + // Metadata that can be used to pretty-print profile counters. + static const ::xla::HloProfilePrinterData* StaticHloProfilePrinterData() { + static const ::xla::HloProfilePrinterData* kHloProfilePrinterData = + nullptr; + return kHloProfilePrinterData; + } +}; + +} // end namespace llvm + +#endif // TFCOMPILE_GENERATED__xla_AI4CMEMOPCompiledModel_AARCH64_llvm_AI4CMEMOPCompiledModel_H_ + +// clang-format on diff --git a/acpo/models/ai4c-memop/AI4CMEMOPCompiledModel-AARCH64.o b/acpo/models/ai4c-memop/AI4CMEMOPCompiledModel-AARCH64.o new file mode 100644 index 0000000000000000000000000000000000000000..eeb6fd1e5d45f7e8a491eaf91025e9904e20e99a Binary files /dev/null and b/acpo/models/ai4c-memop/AI4CMEMOPCompiledModel-AARCH64.o differ diff --git a/acpo/models/ai4c-memop/AI4CMEMOPCompiledModel-AARCH64_metadata.o b/acpo/models/ai4c-memop/AI4CMEMOPCompiledModel-AARCH64_metadata.o new file mode 100644 index 0000000000000000000000000000000000000000..cc02d5395c9cbe40bd5f9160f354848878b08721 Binary files /dev/null and b/acpo/models/ai4c-memop/AI4CMEMOPCompiledModel-AARCH64_metadata.o differ diff --git a/acpo/models/ai4c-memop/saved_model.pb b/acpo/models/ai4c-memop/saved_model.pb new file mode 100644 index 0000000000000000000000000000000000000000..44df523eeeca293ce283457f097b9692732d9c9a Binary files /dev/null and b/acpo/models/ai4c-memop/saved_model.pb differ diff --git a/acpo/models/ai4c-memop/sc.pkl b/acpo/models/ai4c-memop/sc.pkl new file mode 100644 index 0000000000000000000000000000000000000000..602ca9a9cab4375654ed6941e67c7a268c129b5c Binary files /dev/null and b/acpo/models/ai4c-memop/sc.pkl differ diff --git a/acpo/models/ai4c-memop/variables/variables.data-00000-of-00001 b/acpo/models/ai4c-memop/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..84f6bd73d7523bd92d84365ab9b26974b423c9c3 Binary files /dev/null and b/acpo/models/ai4c-memop/variables/variables.data-00000-of-00001 differ diff --git a/acpo/models/ai4c-memop/variables/variables.index b/acpo/models/ai4c-memop/variables/variables.index new file mode 100644 index 0000000000000000000000000000000000000000..dcdb372b626ab6efee8c1042f66bb1756773e96b Binary files /dev/null and b/acpo/models/ai4c-memop/variables/variables.index differ diff --git a/acpo/models/fi-dummy/output_spec.json b/acpo/models/fi-dummy/output_spec.json new file mode 100644 index 0000000000000000000000000000000000000000..1636185d6f73a63a5395b98f692df7a293120a8a --- /dev/null +++ b/acpo/models/fi-dummy/output_spec.json @@ -0,0 +1,14 @@ + +[ + { + "logging_name": "inlining_decision", + "tensor_spec": { + "name": "PartitionedCall", + "port": 0, + "type": "float64_t", + "shape": [ + 1 + ] + } + } +] diff --git a/acpo/models/fi-dummy/saved_model.pb b/acpo/models/fi-dummy/saved_model.pb new file mode 100644 index 0000000000000000000000000000000000000000..fc47e10babe8e4eb25fe78752e716281e1f7f82f Binary files /dev/null and b/acpo/models/fi-dummy/saved_model.pb differ diff --git a/acpo/models/fi-dummy/variables/variables.data-00000-of-00001 b/acpo/models/fi-dummy/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..4b27e1a41bd8cd41c633b7f50d572ad946e92d79 Binary files /dev/null and b/acpo/models/fi-dummy/variables/variables.data-00000-of-00001 differ diff --git a/acpo/models/fi-dummy/variables/variables.index b/acpo/models/fi-dummy/variables/variables.index new file mode 100644 index 0000000000000000000000000000000000000000..333057c66e88065a76dd0f39edd3fcfe208fc138 Binary files /dev/null and b/acpo/models/fi-dummy/variables/variables.index differ