Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tmva/sofie/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie
TMVA/OperatorList.hxx
TMVA/RModel_Base.hxx
TMVA/RModel.hxx
TMVA/RModelProfiler.hxx
TMVA/ROperator.hxx
TMVA/ROperator_BasicUnary.hxx
TMVA/ROperator_BasicBinary.hxx
Expand Down Expand Up @@ -77,6 +78,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie
SOURCES
src/RModel_Base.cxx
src/RModel.cxx
src/RModelProfiler.cxx
src/RModel_GNN.cxx
src/RModel_GraphIndependent.cxx
src/RFunction.cxx
Expand Down
8 changes: 6 additions & 2 deletions tmva/sofie/inc/TMVA/RModel.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@ namespace SOFIE {

class RModel final : public RModel_Base {

friend class RModelProfiler;

private:
bool fIsInitialized = false;
bool fIsSubGraph = false;
int fVerbose = 0;
int fBatchSize = -1;
long fReadPos = 0; // reading file position
std::string fProfilerGC = "";
bool fProfile = false;

OptimizationLevel fOptimizationLevel = OptimizationLevel::kExtended;

Expand Down Expand Up @@ -148,8 +152,8 @@ public:

void Initialize(int batchSize = -1, bool verbose = false);
void Initialize(const std::map<std::string,size_t> & inputParams, bool verbose = false);

void Generate(std::underlying_type_t<Options> options, int batchSize = -1, long pos = 0, bool verbose = false);
void Generate(std::underlying_type_t<Options> options, int batchSize = -1, long pos = 0, bool verbose = false);
void Generate(Options options = Options::kDefault, int batchSize = -1, int pos = 0, bool verbose = false)
{
Generate(static_cast<std::underlying_type_t<Options>>(options), batchSize, pos, verbose);
Expand Down
42 changes: 42 additions & 0 deletions tmva/sofie/inc/TMVA/RModelProfiler.hxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#ifndef TMVA_SOFIE_RMODELPROFILER
#define TMVA_SOFIE_RMODELPROFILER

#include "TMVA/RModel.hxx"

namespace TMVA {
namespace Experimental {
namespace SOFIE {

/// \class RModelProfiler
/// \brief A helper class to generate profiled inference code for an RModel.
///
/// This class instruments the generated C++ code to measure the execution
/// time of each operator. It is invoked when the RModel::Generate is called
/// with the Options::kProfile flag.
class RModelProfiler {
private:
RModel &fModel;

void GenerateUtilityFunctions();

public:
// The profiler must be constructed with a model to work on.
RModelProfiler() = delete;
RModelProfiler(RModel &model);
~RModelProfiler() = default;

// There is no point in copying or moving an RModelProfiler
RModelProfiler(const RModelProfiler &other) = delete;
RModelProfiler(RModelProfiler &&other) = delete;
RModelProfiler &operator=(const RModelProfiler &other) = delete;
RModelProfiler &operator=(RModelProfiler &&other) = delete;

// Main function to generate the profiled code.
void Generate();
};

} // namespace SOFIE
} // namespace Experimental
} // namespace TMVA

#endif // TMVA_SOFIE_RMODELPROFILER
1 change: 1 addition & 0 deletions tmva/sofie/inc/TMVA/RModel_Base.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ enum class Options {
kRootBinaryWeightFile = 0x4,
kGNN = 0x8,
kGNNComponent = 0x10,
kProfile = 0x20,
};

// Optimization levels inspired by ONNXRuntime.
Expand Down
3 changes: 3 additions & 0 deletions tmva/sofie/inc/TMVA/ROperator.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ public:
//virtual void Forward_blas() = 0;
virtual ~ROperator(){}

std::string name = "UnnamedOperator";
const std::string &GetOperatorName() { return name; };

protected:

const std::string SP = " "; ///< space used to correctly indent the generated C++ code
Expand Down
49 changes: 29 additions & 20 deletions tmva/sofie/src/RModel.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#endif

#include "TMVA/RModel.hxx"
#include "TMVA/RModelProfiler.hxx"
#include "TMVA/SOFIE_common.hxx"

namespace TMVA {
Expand Down Expand Up @@ -850,7 +851,7 @@ void RModel::GenerateSessionCode()
CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx);
}

// to check remaining unused fragments after memory allocation (lesser the better)
// to check remaining unused fragments after memory allocation (lesser the better)
// for (const auto &it: fIntermediateMemoryInfo.available_stack){
// std::cout<<"chunk_idx: "<<it.first<<", chunk_size: "<<it.second<<"\n";
// }
Expand Down Expand Up @@ -878,13 +879,13 @@ void RModel::GenerateSessionCode()
// Generate code for Session constructor
if (fUseSession) {
std::string sessionName = "Session";
if (fIsSubGraph)
if (fIsSubGraph)
sessionName += "_" + fName;
// add here specific operator code that needs to define session data members
fGC += "\n";
for (size_t id = 0; id < fOperators.size(); id++) {
std::string opName = std::to_string(id);
fGC += fOperators[id]->GenerateSessionMembersCode(opName);
fGC += fOperators[id]->GenerateSessionMembersCode(opName);
}
fGC += "\n";
// here add initialization and reading of weight tensors
Expand Down Expand Up @@ -930,23 +931,28 @@ void RModel::GenerateSessionCode()
fGC += "}\n\n";
}

fGC += doInferSignature + "{\n";
fGC += "\n";
if (fProfile) {
RModelProfiler profiler(*this);
profiler.Generate();
fGC += fProfilerGC;
} else {
fGC += doInferSignature + "{\n";
fGC += "\n";

// generate the inference code
if (fVerbose)
std::cout << "Generating main inference code for " << fName << std::endl;
// generate the inference code
if (fVerbose)
std::cout << "Generating main inference code for " << fName << std::endl;

if (fOutputTensorNames.size() == 0)
throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported");
if (fOutputTensorNames.size() == 0)
throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported");

for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) {
if (fVerbose)
for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) {
if (fVerbose)
std::cout << "Generating code for operator .... " << op_idx << std::endl;
fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx)));
}
fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx)));
}

fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";

for (std::string const &name : fOutputTensorNames) {
// need to check is size is the same (don't want to return a vector with
Expand All @@ -957,7 +963,8 @@ void RModel::GenerateSessionCode()
fGC += SP + "FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n";
}

fGC += "}\n\n";
fGC += "}\n\n";
}

// generate the inference overload that returns an output struct
GenerateOutput();
Expand All @@ -970,9 +977,11 @@ void RModel::GenerateSessionCode()

void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, long pos, bool verbose)
{
bool profile = (options & static_cast<std::underlying_type_t<Options>>(Options::kProfile));
fVerbose = verbose;
fBatchSize = batchSize;
fReadPos = pos;
fProfile = profile;

// session flag is used in operator initialize
if (static_cast<std::underlying_type_t<Options>>(Options::kNoSession) & options) {
Expand All @@ -992,9 +1001,9 @@ void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, lo
"TMVA-SOFIE: RModel::Generate: cannot use a separate weight file without generating a Session class");
}

if (static_cast<std::underlying_type_t<Options>>(Options::kGNN) & options)
if (static_cast<std::underlying_type_t<Options>>(Options::kGNN) & options)
fIsGNN = true;
if (static_cast<std::underlying_type_t<Options>>(Options::kGNNComponent) & options)
if (static_cast<std::underlying_type_t<Options>>(Options::kGNNComponent) & options)
fIsGNNComponent = true;

// initialize the model including all operators and sub-graphs
Expand All @@ -1008,13 +1017,13 @@ void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, lo

// generate first code for the subgraphs
for (auto &graph : fSubGraphs) {
if (fVerbose)
if (fVerbose)
std::cout << "generate session code for subgraph " << graph->fName << std::endl;
graph->GenerateSessionCode();
fGC += graph->fGC;
}

if (fVerbose)
if (fVerbose)
std::cout << "generate Main session code - model " << fName << std::endl;

// generate main session code
Expand Down
161 changes: 161 additions & 0 deletions tmva/sofie/src/RModelProfiler.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#include "TMVA/RModelProfiler.hxx"
#include "TMVA/SOFIE_common.hxx"

namespace TMVA {
namespace Experimental {
namespace SOFIE {

// The constructor now just registers the necessary C++ libraries.
RModelProfiler::RModelProfiler(RModel &model) : fModel(model)
{
fModel.AddNeededStdLib("chrono"); // for timing operators
fModel.AddNeededStdLib("vector"); // for storing profiling results
fModel.AddNeededStdLib("string"); // for operator names
fModel.AddNeededStdLib("map"); // for the results map
fModel.AddNeededStdLib("iostream"); // for printing results
fModel.AddNeededStdLib("iomanip"); // for printing results
}

// This function generates the helper functions inside the Session struct.
void RModelProfiler::GenerateUtilityFunctions()
{
auto &gc = fModel.fProfilerGC;

// Generate PrintProfilingResults function
gc += " void PrintProfilingResults() const {\n";
gc += " if (fProfilingResults.empty()) {\n";
gc += " std::cout << \"No profiling results to display.\" << std::endl;\n";
gc += " return;\n";
gc += " }\n";
gc += "\n";
gc += " std::cout << \"\\n\" << std::string(50, '=') << std::endl;\n";
gc += " std::cout << \" AVERAGE PROFILING RESULTS\" << std::endl;\n";
gc += " std::cout << std::string(50, '=') << std::endl;\n";
gc += " for (const auto& op : fProfilingResults) {\n";
gc += " double sum = 0.0;\n";
gc += " for (double time : op.second) {\n";
gc += " sum += time;\n";
gc += " }\n";
gc += " double average = sum / op.second.size();\n";
gc += " std::cout << \" \" << std::left << std::setw(20) << op.first\n";
gc += " << \": \" << std::fixed << std::setprecision(6) << average << \" us\"\n";
gc += " << \" (over \" << op.second.size() << \" runs)\" << std::endl;\n";
gc += " }\n";
gc += " std::cout << std::string(50, '=') << \"\\n\" << std::endl;\n";
gc += " }\n";
gc += "\n";

// Generate ResetProfilingResults function
gc += " void ResetProfilingResults() {\n";
gc += " fProfilingResults.clear();\n";
gc += " }\n";
gc += "\n";

// Generate GetOpAvgTime function
gc += " std::map<std::string, double> GetOpAvgTime() const {\n";
gc += " if (fProfilingResults.empty()) {\n";
gc += " return {};\n";
gc += " }\n";
gc += "\n";
gc += " std::map<std::string, double> avg;\n";
gc += " for (const auto& op : fProfilingResults) {\n";
gc += " double mean = 0.0;\n";
gc += " for (double time : op.second) {\n";
gc += " mean += time;\n";
gc += " }\n";
gc += " mean /= op.second.size();\n";
gc += " avg[op.first] = mean;\n";
gc += " }\n";
gc += "\n";
gc += " return avg;\n";
gc += " }\n";
gc += "\n";

// Generate GetOpVariance function
gc += " std::map<std::string, double> GetOpVariance() const {\n";
gc += " if (fProfilingResults.empty()) {\n";
gc += " return {};\n";
gc += " }\n";
gc += "\n";
gc += " std::map<std::string, double> variance;\n";
gc += " for (const auto& op : fProfilingResults) {\n";
gc += " // Var[X] = E[X^2] - E[X]^2\n";
gc += " double mean = 0.0, mean2 = 0.0;\n";
gc += " for (double time : op.second) {\n";
gc += " mean += time;\n";
gc += " mean2 += time * time;\n";
gc += " }\n";
gc += " mean /= op.second.size();\n";
gc += " mean2 /= op.second.size();\n";
gc += " variance[op.first] = mean2 - mean * mean;\n";
gc += " }\n";
gc += "\n";
gc += " return variance;\n";
gc += " }\n";
}

// Main generation function for the profiler.
void RModelProfiler::Generate()
{
// Clear the profiler's code string to start fresh.
fModel.fProfilerGC.clear();
auto &gc = fModel.fProfilerGC;

// 1. Add the data member to the Session struct to store results.
gc += "public:\n";
gc += " // Maps an operator name to a vector of its execution times (in microseconds).\n";
gc += " std::map<std::string, std::vector<double>> fProfilingResults;\n\n";

// 2. Generate and add the utility functions like PrintProfilingResults.
GenerateUtilityFunctions();

// 3. Generate the signature for the profiled doInfer method.
std::string doInferSignature = fModel.GenerateInferSignature();
if (!doInferSignature.empty()) doInferSignature += ", ";
for (auto const &name : fModel.GetOutputTensorNames()) {
doInferSignature += " std::vector<" + ConvertTypeToString(fModel.GetTensorType(name)) + "> &output_tensor_" + name + ",";
}
if (!fModel.GetOutputTensorNames().empty()) {
doInferSignature.back() = ' ';
}
gc += "void doInfer(" + doInferSignature + ") {\n";

// 4. Generate the body of the doInfer method with timing instrumentation.
gc += " // Timer variable for profiling\n";
gc += " std::chrono::steady_clock::time_point tp_start, tp_overall_start;\n\n";
gc += " tp_overall_start = std::chrono::steady_clock::now();\n\n";

for (size_t op_idx = 0; op_idx < fModel.fOperators.size(); ++op_idx) {
const auto& op = fModel.fOperators[op_idx];
gc += " // -- Profiling for operator " + op->name + " --\n";
gc += " tp_start = std::chrono::steady_clock::now();\n\n";

// Add the actual operator inference code
gc += op->Generate(std::to_string(op_idx));

// Add the code to stop the timer and store the result
gc += "\n fProfilingResults[\"" + op->name + "\"].push_back(\n";
gc += " std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(\n";
gc += " std::chrono::steady_clock::now() - tp_start).count());\n\n";
}

// 5. Generate the code to fill the output tensors.
gc += " using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
for (std::string const &name : fModel.GetOutputTensorNames()) {
bool isIntermediate = fModel.fIntermediateTensorInfos.count(name) > 0;
std::string n = isIntermediate ? std::to_string(ConvertShapeToLength(fModel.GetTensorShape(name)))
: ConvertDynamicShapeToLength(fModel.GetDynamicTensorShape(name));
gc += " FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n";
}

gc += "\n // -- Record overall inference time --\n";
gc += " fProfilingResults[\"Overall_Time\"].push_back(\n";
gc += " std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(\n";
gc += " std::chrono::steady_clock::now() - tp_overall_start).count());\n";

gc += "}\n\n"; // End of doInfer function
}

} // namespace SOFIE
} // namespace Experimental
} // namespace TMVA
Loading