From 0c9415beb117fa7fe3c7feb85294c5e218801780 Mon Sep 17 00:00:00 2001 From: Olia Date: Mon, 11 Aug 2025 12:46:25 +0200 Subject: [PATCH] Time Profiler for Sofie --- tmva/sofie/CMakeLists.txt | 2 + tmva/sofie/inc/TMVA/RModel.hxx | 8 +- tmva/sofie/inc/TMVA/RModelProfiler.hxx | 42 +++++ tmva/sofie/inc/TMVA/RModel_Base.hxx | 1 + tmva/sofie/inc/TMVA/ROperator.hxx | 3 + tmva/sofie/src/RModel.cxx | 49 +++--- tmva/sofie/src/RModelProfiler.cxx | 161 +++++++++++++++++++ tmva/sofie_parsers/src/RModelParser_ONNX.cxx | 9 +- tutorials/machine_learning/TMVA_SOFIE_ONNX.C | 2 +- 9 files changed, 253 insertions(+), 24 deletions(-) create mode 100644 tmva/sofie/inc/TMVA/RModelProfiler.hxx create mode 100644 tmva/sofie/src/RModelProfiler.cxx diff --git a/tmva/sofie/CMakeLists.txt b/tmva/sofie/CMakeLists.txt index c807d1b7b8c27..f56d2350ecadd 100644 --- a/tmva/sofie/CMakeLists.txt +++ b/tmva/sofie/CMakeLists.txt @@ -22,6 +22,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie TMVA/OperatorList.hxx TMVA/RModel_Base.hxx TMVA/RModel.hxx + TMVA/RModelProfiler.hxx TMVA/ROperator.hxx TMVA/ROperator_BasicUnary.hxx TMVA/ROperator_BasicBinary.hxx @@ -77,6 +78,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie SOURCES src/RModel_Base.cxx src/RModel.cxx + src/RModelProfiler.cxx src/RModel_GNN.cxx src/RModel_GraphIndependent.cxx src/RFunction.cxx diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx index 80dcc9a9c45d5..f811ca3512c85 100644 --- a/tmva/sofie/inc/TMVA/RModel.hxx +++ b/tmva/sofie/inc/TMVA/RModel.hxx @@ -11,12 +11,16 @@ namespace SOFIE { class RModel final : public RModel_Base { + friend class RModelProfiler; + private: bool fIsInitialized = false; bool fIsSubGraph = false; int fVerbose = 0; int fBatchSize = -1; long fReadPos = 0; // reading file position + std::string fProfilerGC = ""; + bool fProfile = false; OptimizationLevel fOptimizationLevel = OptimizationLevel::kExtended; @@ -148,8 +152,8 @@ public: void Initialize(int batchSize = -1, bool verbose = false); void Initialize(const std::map & inputParams, bool verbose = false); - - void Generate(std::underlying_type_t options, int batchSize = -1, long pos = 0, bool verbose = false); + + void Generate(std::underlying_type_t options, int batchSize = -1, long pos = 0, bool verbose = false); void Generate(Options options = Options::kDefault, int batchSize = -1, int pos = 0, bool verbose = false) { Generate(static_cast>(options), batchSize, pos, verbose); diff --git a/tmva/sofie/inc/TMVA/RModelProfiler.hxx b/tmva/sofie/inc/TMVA/RModelProfiler.hxx new file mode 100644 index 0000000000000..fd9c8c7d0267d --- /dev/null +++ b/tmva/sofie/inc/TMVA/RModelProfiler.hxx @@ -0,0 +1,42 @@ +#ifndef TMVA_SOFIE_RMODELPROFILER +#define TMVA_SOFIE_RMODELPROFILER + +#include "TMVA/RModel.hxx" + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +/// \class RModelProfiler +/// \brief A helper class to generate profiled inference code for an RModel. +/// +/// This class instruments the generated C++ code to measure the execution +/// time of each operator. It is invoked when the RModel::Generate is called +/// with the Options::kProfile flag. +class RModelProfiler { +private: + RModel &fModel; + + void GenerateUtilityFunctions(); + +public: + // The profiler must be constructed with a model to work on. + RModelProfiler() = delete; + RModelProfiler(RModel &model); + ~RModelProfiler() = default; + + // There is no point in copying or moving an RModelProfiler + RModelProfiler(const RModelProfiler &other) = delete; + RModelProfiler(RModelProfiler &&other) = delete; + RModelProfiler &operator=(const RModelProfiler &other) = delete; + RModelProfiler &operator=(RModelProfiler &&other) = delete; + + // Main function to generate the profiled code. + void Generate(); +}; + +} // namespace SOFIE +} // namespace Experimental +} // namespace TMVA + +#endif // TMVA_SOFIE_RMODELPROFILER diff --git a/tmva/sofie/inc/TMVA/RModel_Base.hxx b/tmva/sofie/inc/TMVA/RModel_Base.hxx index 2cbcc6cc8ea41..2ab5dacaac57f 100644 --- a/tmva/sofie/inc/TMVA/RModel_Base.hxx +++ b/tmva/sofie/inc/TMVA/RModel_Base.hxx @@ -26,6 +26,7 @@ enum class Options { kRootBinaryWeightFile = 0x4, kGNN = 0x8, kGNNComponent = 0x10, + kProfile = 0x20, }; // Optimization levels inspired by ONNXRuntime. diff --git a/tmva/sofie/inc/TMVA/ROperator.hxx b/tmva/sofie/inc/TMVA/ROperator.hxx index f0afd9c4374c1..200cd3f2976fe 100644 --- a/tmva/sofie/inc/TMVA/ROperator.hxx +++ b/tmva/sofie/inc/TMVA/ROperator.hxx @@ -37,6 +37,9 @@ public: //virtual void Forward_blas() = 0; virtual ~ROperator(){} + std::string name = "UnnamedOperator"; + const std::string &GetOperatorName() { return name; }; + protected: const std::string SP = " "; ///< space used to correctly indent the generated C++ code diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index 57d1630f8c619..e7f40e2479333 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -9,6 +9,7 @@ #endif #include "TMVA/RModel.hxx" +#include "TMVA/RModelProfiler.hxx" #include "TMVA/SOFIE_common.hxx" namespace TMVA { @@ -850,7 +851,7 @@ void RModel::GenerateSessionCode() CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx); } - // to check remaining unused fragments after memory allocation (lesser the better) + // to check remaining unused fragments after memory allocation (lesser the better) // for (const auto &it: fIntermediateMemoryInfo.available_stack){ // std::cout<<"chunk_idx: "<GenerateSessionMembersCode(opName); + fGC += fOperators[id]->GenerateSessionMembersCode(opName); } fGC += "\n"; // here add initialization and reading of weight tensors @@ -930,23 +931,28 @@ void RModel::GenerateSessionCode() fGC += "}\n\n"; } - fGC += doInferSignature + "{\n"; - fGC += "\n"; + if (fProfile) { + RModelProfiler profiler(*this); + profiler.Generate(); + fGC += fProfilerGC; + } else { + fGC += doInferSignature + "{\n"; + fGC += "\n"; - // generate the inference code - if (fVerbose) - std::cout << "Generating main inference code for " << fName << std::endl; + // generate the inference code + if (fVerbose) + std::cout << "Generating main inference code for " << fName << std::endl; - if (fOutputTensorNames.size() == 0) - throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported"); + if (fOutputTensorNames.size() == 0) + throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported"); - for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { - if (fVerbose) + for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { + if (fVerbose) std::cout << "Generating code for operator .... " << op_idx << std::endl; - fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx))); - } + fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx))); + } - fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n"; + fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n"; for (std::string const &name : fOutputTensorNames) { // need to check is size is the same (don't want to return a vector with @@ -957,7 +963,8 @@ void RModel::GenerateSessionCode() fGC += SP + "FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n"; } - fGC += "}\n\n"; + fGC += "}\n\n"; + } // generate the inference overload that returns an output struct GenerateOutput(); @@ -970,9 +977,11 @@ void RModel::GenerateSessionCode() void RModel::Generate(std::underlying_type_t options, int batchSize, long pos, bool verbose) { + bool profile = (options & static_cast>(Options::kProfile)); fVerbose = verbose; fBatchSize = batchSize; fReadPos = pos; + fProfile = profile; // session flag is used in operator initialize if (static_cast>(Options::kNoSession) & options) { @@ -992,9 +1001,9 @@ void RModel::Generate(std::underlying_type_t options, int batchSize, lo "TMVA-SOFIE: RModel::Generate: cannot use a separate weight file without generating a Session class"); } - if (static_cast>(Options::kGNN) & options) + if (static_cast>(Options::kGNN) & options) fIsGNN = true; - if (static_cast>(Options::kGNNComponent) & options) + if (static_cast>(Options::kGNNComponent) & options) fIsGNNComponent = true; // initialize the model including all operators and sub-graphs @@ -1008,13 +1017,13 @@ void RModel::Generate(std::underlying_type_t options, int batchSize, lo // generate first code for the subgraphs for (auto &graph : fSubGraphs) { - if (fVerbose) + if (fVerbose) std::cout << "generate session code for subgraph " << graph->fName << std::endl; graph->GenerateSessionCode(); fGC += graph->fGC; } - if (fVerbose) + if (fVerbose) std::cout << "generate Main session code - model " << fName << std::endl; // generate main session code diff --git a/tmva/sofie/src/RModelProfiler.cxx b/tmva/sofie/src/RModelProfiler.cxx new file mode 100644 index 0000000000000..76386e6de817d --- /dev/null +++ b/tmva/sofie/src/RModelProfiler.cxx @@ -0,0 +1,161 @@ +#include "TMVA/RModelProfiler.hxx" +#include "TMVA/SOFIE_common.hxx" + +namespace TMVA { +namespace Experimental { +namespace SOFIE { + +// The constructor now just registers the necessary C++ libraries. +RModelProfiler::RModelProfiler(RModel &model) : fModel(model) +{ + fModel.AddNeededStdLib("chrono"); // for timing operators + fModel.AddNeededStdLib("vector"); // for storing profiling results + fModel.AddNeededStdLib("string"); // for operator names + fModel.AddNeededStdLib("map"); // for the results map + fModel.AddNeededStdLib("iostream"); // for printing results + fModel.AddNeededStdLib("iomanip"); // for printing results +} + +// This function generates the helper functions inside the Session struct. +void RModelProfiler::GenerateUtilityFunctions() +{ + auto &gc = fModel.fProfilerGC; + + // Generate PrintProfilingResults function + gc += " void PrintProfilingResults() const {\n"; + gc += " if (fProfilingResults.empty()) {\n"; + gc += " std::cout << \"No profiling results to display.\" << std::endl;\n"; + gc += " return;\n"; + gc += " }\n"; + gc += "\n"; + gc += " std::cout << \"\\n\" << std::string(50, '=') << std::endl;\n"; + gc += " std::cout << \" AVERAGE PROFILING RESULTS\" << std::endl;\n"; + gc += " std::cout << std::string(50, '=') << std::endl;\n"; + gc += " for (const auto& op : fProfilingResults) {\n"; + gc += " double sum = 0.0;\n"; + gc += " for (double time : op.second) {\n"; + gc += " sum += time;\n"; + gc += " }\n"; + gc += " double average = sum / op.second.size();\n"; + gc += " std::cout << \" \" << std::left << std::setw(20) << op.first\n"; + gc += " << \": \" << std::fixed << std::setprecision(6) << average << \" us\"\n"; + gc += " << \" (over \" << op.second.size() << \" runs)\" << std::endl;\n"; + gc += " }\n"; + gc += " std::cout << std::string(50, '=') << \"\\n\" << std::endl;\n"; + gc += " }\n"; + gc += "\n"; + + // Generate ResetProfilingResults function + gc += " void ResetProfilingResults() {\n"; + gc += " fProfilingResults.clear();\n"; + gc += " }\n"; + gc += "\n"; + + // Generate GetOpAvgTime function + gc += " std::map GetOpAvgTime() const {\n"; + gc += " if (fProfilingResults.empty()) {\n"; + gc += " return {};\n"; + gc += " }\n"; + gc += "\n"; + gc += " std::map avg;\n"; + gc += " for (const auto& op : fProfilingResults) {\n"; + gc += " double mean = 0.0;\n"; + gc += " for (double time : op.second) {\n"; + gc += " mean += time;\n"; + gc += " }\n"; + gc += " mean /= op.second.size();\n"; + gc += " avg[op.first] = mean;\n"; + gc += " }\n"; + gc += "\n"; + gc += " return avg;\n"; + gc += " }\n"; + gc += "\n"; + + // Generate GetOpVariance function + gc += " std::map GetOpVariance() const {\n"; + gc += " if (fProfilingResults.empty()) {\n"; + gc += " return {};\n"; + gc += " }\n"; + gc += "\n"; + gc += " std::map variance;\n"; + gc += " for (const auto& op : fProfilingResults) {\n"; + gc += " // Var[X] = E[X^2] - E[X]^2\n"; + gc += " double mean = 0.0, mean2 = 0.0;\n"; + gc += " for (double time : op.second) {\n"; + gc += " mean += time;\n"; + gc += " mean2 += time * time;\n"; + gc += " }\n"; + gc += " mean /= op.second.size();\n"; + gc += " mean2 /= op.second.size();\n"; + gc += " variance[op.first] = mean2 - mean * mean;\n"; + gc += " }\n"; + gc += "\n"; + gc += " return variance;\n"; + gc += " }\n"; +} + +// Main generation function for the profiler. +void RModelProfiler::Generate() +{ + // Clear the profiler's code string to start fresh. + fModel.fProfilerGC.clear(); + auto &gc = fModel.fProfilerGC; + + // 1. Add the data member to the Session struct to store results. + gc += "public:\n"; + gc += " // Maps an operator name to a vector of its execution times (in microseconds).\n"; + gc += " std::map> fProfilingResults;\n\n"; + + // 2. Generate and add the utility functions like PrintProfilingResults. + GenerateUtilityFunctions(); + + // 3. Generate the signature for the profiled doInfer method. + std::string doInferSignature = fModel.GenerateInferSignature(); + if (!doInferSignature.empty()) doInferSignature += ", "; + for (auto const &name : fModel.GetOutputTensorNames()) { + doInferSignature += " std::vector<" + ConvertTypeToString(fModel.GetTensorType(name)) + "> &output_tensor_" + name + ","; + } + if (!fModel.GetOutputTensorNames().empty()) { + doInferSignature.back() = ' '; + } + gc += "void doInfer(" + doInferSignature + ") {\n"; + + // 4. Generate the body of the doInfer method with timing instrumentation. + gc += " // Timer variable for profiling\n"; + gc += " std::chrono::steady_clock::time_point tp_start, tp_overall_start;\n\n"; + gc += " tp_overall_start = std::chrono::steady_clock::now();\n\n"; + + for (size_t op_idx = 0; op_idx < fModel.fOperators.size(); ++op_idx) { + const auto& op = fModel.fOperators[op_idx]; + gc += " // -- Profiling for operator " + op->name + " --\n"; + gc += " tp_start = std::chrono::steady_clock::now();\n\n"; + + // Add the actual operator inference code + gc += op->Generate(std::to_string(op_idx)); + + // Add the code to stop the timer and store the result + gc += "\n fProfilingResults[\"" + op->name + "\"].push_back(\n"; + gc += " std::chrono::duration_cast>(\n"; + gc += " std::chrono::steady_clock::now() - tp_start).count());\n\n"; + } + + // 5. Generate the code to fill the output tensors. + gc += " using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n"; + for (std::string const &name : fModel.GetOutputTensorNames()) { + bool isIntermediate = fModel.fIntermediateTensorInfos.count(name) > 0; + std::string n = isIntermediate ? std::to_string(ConvertShapeToLength(fModel.GetTensorShape(name))) + : ConvertDynamicShapeToLength(fModel.GetDynamicTensorShape(name)); + gc += " FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n"; + } + + gc += "\n // -- Record overall inference time --\n"; + gc += " fProfilingResults[\"Overall_Time\"].push_back(\n"; + gc += " std::chrono::duration_cast>(\n"; + gc += " std::chrono::steady_clock::now() - tp_overall_start).count());\n"; + + gc += "}\n\n"; // End of doInfer function +} + +} // namespace SOFIE +} // namespace Experimental +} // namespace TMVA diff --git a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx index 7b4ade2b6bc09..4903c8d1c6511 100644 --- a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx +++ b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx @@ -731,7 +731,8 @@ void RModelParser_ONNX::ParseONNXGraph(RModel & rmodel, const onnx::GraphProto & std::cout << "\t" << i << " " << nodesOrder[i] << " parsing operator " << op_type << std::endl; } - std::unique_ptr op = ParseOperator(i, graph, nodesOrder, nodesChildren[i]); + std::unique_ptr op = ParseOperator(i, graph, nodesOrder, nodesChildren[nodesOrder[i]]); + if (!op) { if (verbose) { std::cout << "\t\tskipping operator since it is fused with previous one" << std::endl; @@ -739,6 +740,12 @@ void RModelParser_ONNX::ParseONNXGraph(RModel & rmodel, const onnx::GraphProto & // for skipping the fused nodes like Add after MatMul continue; } + const auto &nodeproto = graph.node(nodesOrder[i]); + op->name = nodeproto.name(); + if (op->name.empty()) { + op->name = op_type + "_" + std::to_string(i); + } + rmodel.AddOperator(std::move(op), node_order_exec++); } diff --git a/tutorials/machine_learning/TMVA_SOFIE_ONNX.C b/tutorials/machine_learning/TMVA_SOFIE_ONNX.C index 8c192789e1210..878167db8c791 100644 --- a/tutorials/machine_learning/TMVA_SOFIE_ONNX.C +++ b/tutorials/machine_learning/TMVA_SOFIE_ONNX.C @@ -19,7 +19,7 @@ void TMVA_SOFIE_ONNX(std::string inputFile = ""){ SOFIE::RModel model = parser.Parse(inputFile, true); //Generating inference code - model.Generate(); + model.Generate(SOFIE::Options::kProfile); // write the code in a file (by default Linear_16.hxx and Linear_16.dat model.OutputGenerated();