From 0c9415beb117fa7fe3c7feb85294c5e218801780 Mon Sep 17 00:00:00 2001
From: Olia <olia@osirikov-1.dyndns.cern.ch>
Date: Mon, 11 Aug 2025 12:46:25 +0200
Subject: [PATCH] Time Profiler for Sofie

---
 tmva/sofie/CMakeLists.txt                    |   2 +
 tmva/sofie/inc/TMVA/RModel.hxx               |   8 +-
 tmva/sofie/inc/TMVA/RModelProfiler.hxx       |  42 +++++
 tmva/sofie/inc/TMVA/RModel_Base.hxx          |   1 +
 tmva/sofie/inc/TMVA/ROperator.hxx            |   3 +
 tmva/sofie/src/RModel.cxx                    |  49 +++---
 tmva/sofie/src/RModelProfiler.cxx            | 161 +++++++++++++++++++
 tmva/sofie_parsers/src/RModelParser_ONNX.cxx |   9 +-
 tutorials/machine_learning/TMVA_SOFIE_ONNX.C |   2 +-
 9 files changed, 253 insertions(+), 24 deletions(-)
 create mode 100644 tmva/sofie/inc/TMVA/RModelProfiler.hxx
 create mode 100644 tmva/sofie/src/RModelProfiler.cxx
diff --git a/tmva/sofie/CMakeLists.txt b/tmva/sofie/CMakeLists.txt
index c807d1b7b8c27..f56d2350ecadd 100644
--- a/tmva/sofie/CMakeLists.txt
+++ b/tmva/sofie/CMakeLists.txt
@@ -22,6 +22,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie
    TMVA/OperatorList.hxx
    TMVA/RModel_Base.hxx
    TMVA/RModel.hxx
+   TMVA/RModelProfiler.hxx
    TMVA/ROperator.hxx
    TMVA/ROperator_BasicUnary.hxx
    TMVA/ROperator_BasicBinary.hxx
@@ -77,6 +78,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie
   SOURCES
     src/RModel_Base.cxx
     src/RModel.cxx
+    src/RModelProfiler.cxx
     src/RModel_GNN.cxx
     src/RModel_GraphIndependent.cxx
     src/RFunction.cxx
diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx
index 80dcc9a9c45d5..f811ca3512c85 100644
--- a/tmva/sofie/inc/TMVA/RModel.hxx
+++ b/tmva/sofie/inc/TMVA/RModel.hxx
@@ -11,12 +11,16 @@ namespace SOFIE {
 
 class RModel final : public RModel_Base {
 
+   friend class RModelProfiler;
+
 private:
    bool fIsInitialized = false;
    bool fIsSubGraph = false;
    int fVerbose = 0;
    int fBatchSize = -1;
    long fReadPos = 0;  // reading file position
+   std::string fProfilerGC = "";
+   bool fProfile = false;
 
    OptimizationLevel fOptimizationLevel = OptimizationLevel::kExtended;
 
@@ -148,8 +152,8 @@ public:
 
    void Initialize(int batchSize = -1, bool verbose = false);
    void Initialize(const std::map<std::string,size_t> & inputParams, bool verbose = false);
-
-   void Generate(std::underlying_type_t<Options> options, int batchSize = -1, long pos = 0, bool verbose = false);
+   
+    void Generate(std::underlying_type_t<Options> options, int batchSize = -1, long pos = 0, bool verbose = false);
    void Generate(Options options = Options::kDefault, int batchSize = -1, int pos = 0, bool verbose = false)
    {
       Generate(static_cast<std::underlying_type_t<Options>>(options), batchSize, pos, verbose);
diff --git a/tmva/sofie/inc/TMVA/RModelProfiler.hxx b/tmva/sofie/inc/TMVA/RModelProfiler.hxx
new file mode 100644
index 0000000000000..fd9c8c7d0267d
--- /dev/null
+++ b/tmva/sofie/inc/TMVA/RModelProfiler.hxx
@@ -0,0 +1,42 @@
+#ifndef TMVA_SOFIE_RMODELPROFILER
+#define TMVA_SOFIE_RMODELPROFILER
+
+#include "TMVA/RModel.hxx"
+
+namespace TMVA {
+namespace Experimental {
+namespace SOFIE {
+
+/// \class RModelProfiler
+/// \brief A helper class to generate profiled inference code for an RModel.
+///
+/// This class instruments the generated C++ code to measure the execution
+/// time of each operator. It is invoked when the RModel::Generate is called
+/// with the Options::kProfile flag. 
+class RModelProfiler {
+private:
+   RModel &fModel;
+   
+   void GenerateUtilityFunctions();
+
+public:
+   // The profiler must be constructed with a model to work on.
+   RModelProfiler() = delete;
+   RModelProfiler(RModel &model);
+   ~RModelProfiler() = default;
+   
+   // There is no point in copying or moving an RModelProfiler
+   RModelProfiler(const RModelProfiler &other) = delete;
+   RModelProfiler(RModelProfiler &&other) = delete;
+   RModelProfiler &operator=(const RModelProfiler &other) = delete;
+   RModelProfiler &operator=(RModelProfiler &&other) = delete;
+   
+   // Main function to generate the profiled code.
+   void Generate();
+};
+
+} // namespace SOFIE
+} // namespace Experimental
+} // namespace TMVA
+
+#endif // TMVA_SOFIE_RMODELPROFILER
diff --git a/tmva/sofie/inc/TMVA/RModel_Base.hxx b/tmva/sofie/inc/TMVA/RModel_Base.hxx
index 2cbcc6cc8ea41..2ab5dacaac57f 100644
--- a/tmva/sofie/inc/TMVA/RModel_Base.hxx
+++ b/tmva/sofie/inc/TMVA/RModel_Base.hxx
@@ -26,6 +26,7 @@ enum class Options {
    kRootBinaryWeightFile = 0x4,
    kGNN = 0x8,
    kGNNComponent = 0x10,
+   kProfile = 0x20,
 };
 
 // Optimization levels inspired by ONNXRuntime.
diff --git a/tmva/sofie/inc/TMVA/ROperator.hxx b/tmva/sofie/inc/TMVA/ROperator.hxx
index f0afd9c4374c1..200cd3f2976fe 100644
--- a/tmva/sofie/inc/TMVA/ROperator.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator.hxx
@@ -37,6 +37,9 @@ public:
    //virtual void Forward_blas() = 0;
    virtual ~ROperator(){}
 
+   std::string name = "UnnamedOperator";
+   const std::string &GetOperatorName() { return name; };
+
 protected:
 
    const std::string SP = "   ";    ///< space used to correctly indent the generated C++ code
diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx
index 57d1630f8c619..e7f40e2479333 100644
--- a/tmva/sofie/src/RModel.cxx
+++ b/tmva/sofie/src/RModel.cxx
@@ -9,6 +9,7 @@
 #endif
 
 #include "TMVA/RModel.hxx"
+#include "TMVA/RModelProfiler.hxx"
 #include "TMVA/SOFIE_common.hxx"
 
 namespace TMVA {
@@ -850,7 +851,7 @@ void RModel::GenerateSessionCode()
          CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx);
       }
 
-      // to check remaining unused fragments after memory allocation (lesser the better)
+  // to check remaining unused fragments after memory allocation (lesser the better)
       // for (const auto &it: fIntermediateMemoryInfo.available_stack){
       //    std::cout<<"chunk_idx: "<<it.first<<", chunk_size: "<<it.second<<"\n";
       // }
@@ -878,13 +879,13 @@ void RModel::GenerateSessionCode()
    // Generate code for Session constructor
    if (fUseSession) {
       std::string sessionName = "Session";
-      if (fIsSubGraph)
+      if (fIsSubGraph) 
          sessionName += "_" + fName;
       // add here specific operator code that needs to define session data members
       fGC += "\n";
       for (size_t id = 0; id < fOperators.size(); id++) {
          std::string opName = std::to_string(id);
-         fGC += fOperators[id]->GenerateSessionMembersCode(opName);
+         fGC += fOperators[id]->GenerateSessionMembersCode(opName);        
       }
       fGC += "\n";
       // here add initialization and reading of weight tensors
@@ -930,23 +931,28 @@ void RModel::GenerateSessionCode()
       fGC += "}\n\n";
    }
 
-   fGC += doInferSignature + "{\n";
-   fGC += "\n";
+   if (fProfile) {
+      RModelProfiler profiler(*this);
+      profiler.Generate();
+      fGC += fProfilerGC; 
+   } else {
+      fGC += doInferSignature + "{\n";
+      fGC += "\n";
 
-   // generate the inference code
-   if (fVerbose)
-      std::cout << "Generating main inference code for " << fName << std::endl;
+      // generate the inference code
+      if (fVerbose)
+         std::cout << "Generating main inference code for " << fName << std::endl;
 
-   if (fOutputTensorNames.size() == 0)
-      throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported");
+      if (fOutputTensorNames.size() == 0)
+         throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported");
 
-   for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) {
-      if (fVerbose)
+      for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) {
+         if (fVerbose)
          std::cout << "Generating code for operator .... " << op_idx << std::endl;
-      fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx)));
-   }
+         fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx)));
+      }
 
-   fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
+      fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
 
    for (std::string const &name : fOutputTensorNames) {
       // need to check is size is the same (don't want to return a vector with
@@ -957,7 +963,8 @@ void RModel::GenerateSessionCode()
       fGC += SP + "FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n";
    }
 
-   fGC += "}\n\n";
+      fGC += "}\n\n";
+   }
 
    // generate the inference overload that returns an output struct
    GenerateOutput();
@@ -970,9 +977,11 @@ void RModel::GenerateSessionCode()
 
 void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, long pos, bool verbose)
 {
+   bool profile = (options & static_cast<std::underlying_type_t<Options>>(Options::kProfile));
    fVerbose = verbose;
    fBatchSize = batchSize;
    fReadPos = pos;
+   fProfile = profile;
 
    // session flag is used in operator initialize
    if (static_cast<std::underlying_type_t<Options>>(Options::kNoSession) & options) {
@@ -992,9 +1001,9 @@ void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, lo
          "TMVA-SOFIE: RModel::Generate: cannot use a separate weight file without generating a Session class");
    }
 
-   if (static_cast<std::underlying_type_t<Options>>(Options::kGNN) & options)
+   if (static_cast<std::underlying_type_t<Options>>(Options::kGNN) & options) 
       fIsGNN = true;
-   if (static_cast<std::underlying_type_t<Options>>(Options::kGNNComponent) & options)
+   if (static_cast<std::underlying_type_t<Options>>(Options::kGNNComponent) & options) 
       fIsGNNComponent = true;
 
    // initialize the model including all operators and sub-graphs
@@ -1008,13 +1017,13 @@ void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, lo
 
    // generate first code for the subgraphs
    for (auto &graph : fSubGraphs) {
-      if (fVerbose)
+      if (fVerbose) 
          std::cout << "generate session code for subgraph " << graph->fName << std::endl;
       graph->GenerateSessionCode();
       fGC += graph->fGC;
    }
 
-   if (fVerbose)
+   if (fVerbose) 
       std::cout << "generate Main session code - model  " << fName << std::endl;
 
    // generate main session code
diff --git a/tmva/sofie/src/RModelProfiler.cxx b/tmva/sofie/src/RModelProfiler.cxx
new file mode 100644
index 0000000000000..76386e6de817d
--- /dev/null
+++ b/tmva/sofie/src/RModelProfiler.cxx
@@ -0,0 +1,161 @@
+#include "TMVA/RModelProfiler.hxx"
+#include "TMVA/SOFIE_common.hxx"
+
+namespace TMVA {
+namespace Experimental {
+namespace SOFIE {
+
+// The constructor now just registers the necessary C++ libraries.
+RModelProfiler::RModelProfiler(RModel &model) : fModel(model)
+{
+   fModel.AddNeededStdLib("chrono");      // for timing operators
+   fModel.AddNeededStdLib("vector");      // for storing profiling results
+   fModel.AddNeededStdLib("string");      // for operator names
+   fModel.AddNeededStdLib("map");         // for the results map
+   fModel.AddNeededStdLib("iostream");    // for printing results
+   fModel.AddNeededStdLib("iomanip");     // for printing results
+}
+
+// This function generates the helper functions inside the Session struct.
+void RModelProfiler::GenerateUtilityFunctions()
+{
+   auto &gc = fModel.fProfilerGC;
+
+   // Generate PrintProfilingResults function
+   gc += "   void PrintProfilingResults() const {\n";
+   gc += "      if (fProfilingResults.empty()) {\n";
+   gc += "         std::cout << \"No profiling results to display.\" << std::endl;\n";
+   gc += "         return;\n";
+   gc += "      }\n";
+   gc += "\n";
+   gc += "      std::cout << \"\\n\" << std::string(50, '=') << std::endl;\n";
+   gc += "      std::cout << \"         AVERAGE PROFILING RESULTS\" << std::endl;\n";
+   gc += "      std::cout << std::string(50, '=') << std::endl;\n";
+   gc += "      for (const auto& op : fProfilingResults) {\n";
+   gc += "         double sum = 0.0;\n";
+   gc += "         for (double time : op.second) {\n";
+   gc += "            sum += time;\n";
+   gc += "         }\n";
+   gc += "         double average = sum / op.second.size();\n";
+   gc += "         std::cout << \"  \" << std::left << std::setw(20) << op.first\n";
+   gc += "                   << \": \" << std::fixed << std::setprecision(6) << average << \" us\"\n";
+   gc += "                   << \"  (over \" << op.second.size() << \" runs)\" << std::endl;\n";
+   gc += "      }\n";
+   gc += "      std::cout << std::string(50, '=') << \"\\n\" << std::endl;\n";
+   gc += "   }\n";
+   gc += "\n";
+
+   // Generate ResetProfilingResults function
+   gc += "   void ResetProfilingResults() {\n";
+   gc += "      fProfilingResults.clear();\n";
+   gc += "   }\n";
+   gc += "\n";
+
+   // Generate GetOpAvgTime function
+   gc += "   std::map<std::string, double> GetOpAvgTime() const {\n";
+   gc += "      if (fProfilingResults.empty()) {\n";
+   gc += "         return {};\n";
+   gc += "      }\n";
+   gc += "\n";
+   gc += "      std::map<std::string, double> avg;\n";
+   gc += "      for (const auto& op : fProfilingResults) {\n";
+   gc += "         double mean = 0.0;\n";
+   gc += "         for (double time : op.second) {\n";
+   gc += "            mean += time;\n";
+   gc += "         }\n";
+   gc += "         mean /= op.second.size();\n";
+   gc += "         avg[op.first] = mean;\n";
+   gc += "      }\n";
+   gc += "\n";
+   gc += "      return avg;\n";
+   gc += "   }\n";
+   gc += "\n";
+
+   // Generate GetOpVariance function 
+   gc += "   std::map<std::string, double> GetOpVariance() const {\n";
+   gc += "      if (fProfilingResults.empty()) {\n";
+   gc += "         return {};\n";
+   gc += "      }\n";
+   gc += "\n";
+   gc += "      std::map<std::string, double> variance;\n";
+   gc += "      for (const auto& op : fProfilingResults) {\n";
+   gc += "         // Var[X] = E[X^2] - E[X]^2\n";
+   gc += "         double mean = 0.0, mean2 = 0.0;\n";
+   gc += "         for (double time : op.second) {\n";
+   gc += "            mean += time;\n";
+   gc += "            mean2 += time * time;\n";
+   gc += "         }\n";
+   gc += "         mean /= op.second.size();\n";
+   gc += "         mean2 /= op.second.size();\n";
+   gc += "         variance[op.first] = mean2 - mean * mean;\n";
+   gc += "      }\n";
+   gc += "\n";
+   gc += "      return variance;\n";
+   gc += "   }\n";
+}
+
+// Main generation function for the profiler.
+void RModelProfiler::Generate()
+{
+   // Clear the profiler's code string to start fresh.
+   fModel.fProfilerGC.clear();
+   auto &gc = fModel.fProfilerGC;
+
+   // 1. Add the data member to the Session struct to store results.
+   gc += "public:\n";
+   gc += "   // Maps an operator name to a vector of its execution times (in microseconds).\n";
+   gc += "   std::map<std::string, std::vector<double>> fProfilingResults;\n\n";
+
+   // 2. Generate and add the utility functions like PrintProfilingResults.
+   GenerateUtilityFunctions();
+
+   // 3. Generate the signature for the profiled doInfer method.
+   std::string doInferSignature = fModel.GenerateInferSignature();
+   if (!doInferSignature.empty()) doInferSignature += ", ";
+   for (auto const &name : fModel.GetOutputTensorNames()) {
+      doInferSignature += " std::vector<" + ConvertTypeToString(fModel.GetTensorType(name)) + "> &output_tensor_" + name + ",";
+   }
+   if (!fModel.GetOutputTensorNames().empty()) {
+      doInferSignature.back() = ' ';
+   }
+   gc += "void doInfer(" + doInferSignature + ") {\n";
+
+   // 4. Generate the body of the doInfer method with timing instrumentation.
+   gc += "   // Timer variable for profiling\n";
+   gc += "   std::chrono::steady_clock::time_point tp_start, tp_overall_start;\n\n";
+   gc += "   tp_overall_start = std::chrono::steady_clock::now();\n\n";
+
+   for (size_t op_idx = 0; op_idx < fModel.fOperators.size(); ++op_idx) {
+      const auto& op = fModel.fOperators[op_idx];
+      gc += "   // -- Profiling for operator " + op->name + " --\n";
+      gc += "   tp_start = std::chrono::steady_clock::now();\n\n";
+      
+      // Add the actual operator inference code
+      gc += op->Generate(std::to_string(op_idx));
+      
+      // Add the code to stop the timer and store the result
+      gc += "\n   fProfilingResults[\"" + op->name + "\"].push_back(\n";
+      gc += "      std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(\n";
+      gc += "         std::chrono::steady_clock::now() - tp_start).count());\n\n";
+   }
+
+   // 5. Generate the code to fill the output tensors.
+   gc += "   using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
+   for (std::string const &name : fModel.GetOutputTensorNames()) {
+      bool isIntermediate = fModel.fIntermediateTensorInfos.count(name) > 0;
+      std::string n = isIntermediate ? std::to_string(ConvertShapeToLength(fModel.GetTensorShape(name)))
+                                     : ConvertDynamicShapeToLength(fModel.GetDynamicTensorShape(name));
+      gc += "   FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n";
+   }
+
+   gc += "\n   // -- Record overall inference time --\n";
+   gc += "   fProfilingResults[\"Overall_Time\"].push_back(\n";
+   gc += "      std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(\n";
+   gc += "         std::chrono::steady_clock::now() - tp_overall_start).count());\n";
+
+   gc += "}\n\n"; // End of doInfer function
+}
+
+} // namespace SOFIE
+} // namespace Experimental
+} // namespace TMVA
diff --git a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx
index 7b4ade2b6bc09..4903c8d1c6511 100644
--- a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx
+++ b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx
@@ -731,7 +731,8 @@ void RModelParser_ONNX::ParseONNXGraph(RModel & rmodel, const onnx::GraphProto &
          std::cout << "\t" << i << "  " << nodesOrder[i] << " parsing operator " << op_type << std::endl;
       }
 
-      std::unique_ptr<ROperator> op = ParseOperator(i, graph, nodesOrder, nodesChildren[i]);
+      std::unique_ptr<ROperator> op = ParseOperator(i, graph, nodesOrder, nodesChildren[nodesOrder[i]]);
+
       if (!op) {
          if (verbose) {
             std::cout << "\t\tskipping operator since it is fused with previous one" << std::endl;
@@ -739,6 +740,12 @@ void RModelParser_ONNX::ParseONNXGraph(RModel & rmodel, const onnx::GraphProto &
          // for skipping the fused nodes like Add after MatMul
          continue;
       }
+      const auto &nodeproto = graph.node(nodesOrder[i]);
+      op->name = nodeproto.name();
+      if (op->name.empty()) {
+          op->name = op_type + "_" + std::to_string(i);
+      }
+
       rmodel.AddOperator(std::move(op), node_order_exec++);
    }
 
diff --git a/tutorials/machine_learning/TMVA_SOFIE_ONNX.C b/tutorials/machine_learning/TMVA_SOFIE_ONNX.C
index 8c192789e1210..878167db8c791 100644
--- a/tutorials/machine_learning/TMVA_SOFIE_ONNX.C
+++ b/tutorials/machine_learning/TMVA_SOFIE_ONNX.C
@@ -19,7 +19,7 @@ void TMVA_SOFIE_ONNX(std::string inputFile = ""){
     SOFIE::RModel model = parser.Parse(inputFile, true);
 
     //Generating inference code
-    model.Generate();
+    model.Generate(SOFIE::Options::kProfile);
     // write the code in a file (by default Linear_16.hxx and Linear_16.dat
     model.OutputGenerated();