From 855380cedbb7d25c2cc65187d3d0a7c017110acb Mon Sep 17 00:00:00 2001 From: Jonas Rembser Date: Thu, 12 Mar 2026 09:20:41 +0100 Subject: [PATCH 1/2] Revert "[tmva][sofie] Fix NonZero to define max output shape values in Session ctor" This reverts commit 1f747b0bafd3a05e54217771a29a2cdb6b91da0f. The reason for the revert is that it's actually useful to have the maximum dynamic tensor size as a datamember of the Session, because then we can refactor the generated code such that it can be differentiated with Clad. --- tmva/sofie/inc/TMVA/ROperator.hxx | 4 +--- tmva/sofie/inc/TMVA/ROperator_NonZero.hxx | 8 ++++---- tmva/sofie/src/RModel.cxx | 5 ----- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/tmva/sofie/inc/TMVA/ROperator.hxx b/tmva/sofie/inc/TMVA/ROperator.hxx index 6fac7958f8f9d..f0afd9c4374c1 100644 --- a/tmva/sofie/inc/TMVA/ROperator.hxx +++ b/tmva/sofie/inc/TMVA/ROperator.hxx @@ -25,9 +25,7 @@ public: virtual std::vector TypeInference(std::vector) { return {}; }; virtual void Initialize(RModel&) = 0; virtual std::string Generate(std::string OpName) = 0; //expect unique opName for each operator within the same RModel - // generate code for Session constructor before tensor allocation - virtual std::string GenerateSessionCtorCode() { return "";} - // generate initialization code for session constructor after tensor allocations + // generate initialization code for session constructor virtual std::string GenerateInitCode() { return "";} // generate some specific declaration code for Session virtual std::string GenerateDeclCode() { return "";} diff --git a/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx b/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx index fdf04665e0315..1fcb9cb45e74d 100644 --- a/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx @@ -92,8 +92,8 @@ public: fShapeY.resize(2); fShapeY[0] = fShapeX.size(); - // flag -1 to define the shape variable in the constructor code and not in the constructor signature - fShapeY[1] = Dim{std::string("v_NonZero_") + fNX, static_cast(-1) }; + // identify as -1 since we will declare maximum as size of input + fShapeY[1] = Dim{std::string("v_NonZero_") + fNX, static_cast(-1)}; model.AddIntermediateTensor(fNY, ETensorType::INT64, fShapeY); if (model.Verbose()) { @@ -101,7 +101,7 @@ public: } } } - std::string GenerateSessionCtorCode() override { + std::string GenerateSessionMembersCode(std::string /*opName*/) override { if (fIsOutputConstant) return ""; // define output value used as max non zero with max size = input shape * N auto inputLength = ConvertDimShapeToLength(fShapeX); @@ -133,7 +133,7 @@ public: // loop on input indices out << SP << "size_t offset_" << opName << " = 0;\n"; - out << SP << "size_t " << vnonzero << " = 0;\n"; + out << SP << vnonzero << " = 0;\n"; for (size_t j = 0; j < dims; j++) { std::string index = "i_" + std::to_string(j); for (size_t k = 0; k <= j; k++) out << SP; diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index 0e6a7d7cf9bbf..e89e8513cf783 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -1152,11 +1152,6 @@ void RModel::GenerateSessionCode() } fGC += ") {\n"; - // add some code required in session constructor - for (size_t id = 0; id < fOperators.size(); id++) { - fGC += fOperators[id]->GenerateSessionCtorCode(); - } - // initializing dynamic parameters if (!fDimShapeNames.empty()) { fGC += "\n\n"; From ac44395a6b2f756a88b59f9896f8322051d33617 Mon Sep 17 00:00:00 2001 From: Jonas Rembser Date: Mon, 7 Apr 2025 13:42:12 +0200 Subject: [PATCH 2/2] [tmva][sofie] Restructure emitted code to be differentiable with Clad The idea of this commit is to refactor the `doInfer()` function that implements the inference from a member function of the `Session` struct to a free function that takes the `Session` by `const`-reference. This free function should only use the session struct and bare C-style arrays, so that Clad will have no problem differentiating it. A unit test for the differentiation of a simple MLP is implemented, embedded in the existing SOFIE tests. --- tmva/sofie/inc/TMVA/RModel.hxx | 4 +- tmva/sofie/inc/TMVA/ROperator_Gemm.hxx | 7 +- tmva/sofie/inc/TMVA/ROperator_LSTM.hxx | 121 +++++----- tmva/sofie/inc/TMVA/ROperator_NonZero.hxx | 6 +- tmva/sofie/inc/TMVA/ROperator_RNN.hxx | 46 ++-- tmva/sofie/inc/TMVA/SOFIE_common.hxx | 10 - tmva/sofie/src/RModel.cxx | 271 ++++++++++++++++++---- tmva/sofie/test/CMakeLists.txt | 14 +- tmva/sofie/test/TestCladAutodiff.cxx | 111 +++++++++ 9 files changed, 453 insertions(+), 137 deletions(-) create mode 100644 tmva/sofie/test/TestCladAutodiff.cxx diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx index bc6493090f74e..3395de6d34166 100644 --- a/tmva/sofie/inc/TMVA/RModel.hxx +++ b/tmva/sofie/inc/TMVA/RModel.hxx @@ -35,8 +35,6 @@ private: std::vector fOutputTensorNames; std::vector fInputTensorNames; // input tensor names using ONNX order - - std::vector> fOperators; std::vector> fSubGraphs; /// CollectTensorMemberNames(const std::string &input); public: const std::vector & GetInputTensorNames() const { return fInputTensorNames; } diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx index ecdd0b435fe37..83381baa39f0c 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx @@ -393,9 +393,12 @@ namespace SOFIE{ << (fAttrTransB ? "true, " : "false, ") << (fAttrTransA ? "true, " : "false, ") << n << ", " << m << ", " << k << ", "; - out << std::setprecision(std::numeric_limits::max_digits10) << fAttrAlpha << ", tensor_" << fNB; + // TODO: the cast to (float *) is not needed here from the C++ language perspective (the arguments to + // Gemm_Call are const already), but Clad bug https://github.com/vgvassilev/clad/issues/1721 is requiring + // us to do this cast to keep Clad working. Remove this hack once the Clad issue is fixed. + out << std::setprecision(std::numeric_limits::max_digits10) << fAttrAlpha << ", (float*)tensor_" << fNB; if (extraB) out << " + " << opName << "_B_offset"; - out << ", tensor_" << fNA; + out << ", (float*)tensor_" << fNA; // TODO: same here if (extraA) out << " + " << opName << "_A_offset"; out << ", " << std::setprecision(std::numeric_limits::max_digits10) << fAttrBeta << ","; // in the case of bias and no broadcasting needed diff --git a/tmva/sofie/inc/TMVA/ROperator_LSTM.hxx b/tmva/sofie/inc/TMVA/ROperator_LSTM.hxx index 84f37bc57da7e..ae0ee70c4eeea 100644 --- a/tmva/sofie/inc/TMVA/ROperator_LSTM.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_LSTM.hxx @@ -390,45 +390,62 @@ std::string ROperator_LSTM::GenerateSessionMembersCode(std::string opName) size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0]; size_t input_size = fShapeX[2]; + struct Block { + std::string name; + size_t size; + }; + + std::vector blocks; + + size_t ff_size = seq_length * batch_size * fAttrHiddenSize; + size_t hs_size = seq_length * num_directions * batch_size * fAttrHiddenSize; + + // Layout-dependent buffers if (fAttrLayout != 0) { - out << "std::vector<" << fType << "> fVec_" << opName << "_input = std::vector<" << fType << ">(" - << seq_length * batch_size * input_size << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_initial_hidden_state = std::vector<" << fType << ">(" - << num_directions * batch_size * fAttrHiddenSize << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_initial_cell_state = std::vector<" << fType << ">(" - << num_directions * batch_size * fAttrHiddenSize << ");\n"; + blocks.push_back({"input", seq_length * batch_size * input_size}); + blocks.push_back({"initial_hidden_state", num_directions * batch_size * fAttrHiddenSize}); + blocks.push_back({"initial_cell_state", num_directions * batch_size * fAttrHiddenSize}); } - // Set the feedforward - size_t ff_size = seq_length * batch_size * fAttrHiddenSize; - out << "std::vector<" << fType << "> fVec_" << opName << "_ff_input_gate = std::vector<" << fType << ">(" << ff_size - << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_ff_output_gate = std::vector<" << fType << ">(" << ff_size - << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_ff_cell_gate = std::vector<" << fType << ">(" << ff_size - << ");\n"; + + // Feedforward gates + blocks.push_back({"ff_input_gate", ff_size}); + blocks.push_back({"ff_output_gate", ff_size}); + blocks.push_back({"ff_cell_gate", ff_size}); if (fAttrInputForget == 0) - out << "std::vector<" << fType << "> fVec_" << opName << "_ff_forget_gate = std::vector<" << fType << ">(" - << ff_size << ");\n"; - // gate results - size_t hs_size = seq_length * num_directions * batch_size * fAttrHiddenSize; - out << "std::vector<" << fType << "> fVec_" << opName << "_input_gate = std::vector<" << fType << ">(" << hs_size - << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_output_gate = std::vector<" << fType << ">(" << hs_size - << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_cell_gate = std::vector<" << fType << ">(" << hs_size - << ");\n"; + blocks.push_back({"ff_forget_gate", ff_size}); + + // Gate outputs + blocks.push_back({"input_gate", hs_size}); + blocks.push_back({"output_gate", hs_size}); + blocks.push_back({"cell_gate", hs_size}); if (fAttrInputForget == 0) - out << "std::vector<" << fType << "> fVec_" << opName << "_forget_gate = std::vector<" << fType << ">(" << hs_size - << ");\n"; - // cell state - out << "std::vector<" << fType << "> fVec_" << opName << "_cell_state = std::vector<" << fType << ">(" << hs_size - << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_new_cell_state = std::vector<" << fType << ">(" << hs_size - << ");\n"; - // hiddden state + blocks.push_back({"forget_gate", hs_size}); + + // Cell state + blocks.push_back({"cell_state", hs_size}); + blocks.push_back({"new_cell_state", hs_size}); + + // Hidden state (conditional) if (fAttrLayout != 0 || fNY.empty()) { - out << "std::vector<" << fType << "> fVec_" << opName << "_hidden_state = std::vector<" << fType << ">(" - << hs_size << ");\n"; + blocks.push_back({"hidden_state", hs_size}); + } + + // Compute total size + size_t total_size = 0; + for (const auto &b : blocks) { + total_size += b.size; + } + + // Backing storage + out << "std::vector<" << fType << "> fVec_" << opName << "_buffer = std::vector<" << fType << ">(" << total_size + << ");\n"; + + // Emit pointers + std::size_t offset = 0; + for (const auto &b : blocks) { + out << fType << "* fVec_" << opName << "_" << b.name << " = fVec_" << opName << "_buffer.data() + " << offset + << ";\n"; + offset += b.size; } out << "\n"; @@ -452,7 +469,7 @@ auto ROperator_LSTM::Generate(std::string OpName) -> std::string out << SP << fType << " const *" << OpName << "_input = tensor_" << fNX << ";\n"; } else { if (fUseSession) - out << SP << fType << " * " << OpName << "_input = this->fVec_" << OpName << "_input.data();\n"; + out << SP << fType << " * " << OpName << "_input = this->fVec_" << OpName << "_input;\n"; else out << SP << fType << " " << OpName << "_input[" << seq_length * batch_size * input_size << "] = {0};\n"; @@ -470,11 +487,11 @@ auto ROperator_LSTM::Generate(std::string OpName) -> std::string // Set the initial hidden state if (!fNInitial_h.empty()) { if (fAttrLayout == 0) { - out << SP << fType << " *" << OpName << "_initial_hidden_state = " << " tensor_" << fNInitial_h << ";\n"; + out << SP << fType << " const*" << OpName << "_initial_hidden_state = " << " tensor_" << fNInitial_h << ";\n"; } else { if (fUseSession) - out << SP << fType << " * " << OpName << "_initial_hidden_state = this->fVec_" << OpName - << "_initial_hidden_state.data();\n"; + out << SP << fType << " const* " << OpName << "_initial_hidden_state = this->fVec_" << OpName + << "_initial_hidden_state;\n"; else out << SP << fType << " " << OpName << "_initial_hidden_state[" << num_directions * batch_size * fAttrHiddenSize << "] = {0};\n"; @@ -494,11 +511,11 @@ auto ROperator_LSTM::Generate(std::string OpName) -> std::string // Set the initial cell state if (!fNInitial_c.empty()) { if (fAttrLayout == 0) { - out << SP << fType << " *" << OpName << "_initial_cell_state = " << " tensor_" << fNInitial_c << ";\n"; + out << SP << fType << " const*" << OpName << "_initial_cell_state = " << " tensor_" << fNInitial_c << ";\n"; } else { if (fUseSession) - out << SP << fType << " * " << OpName << "_initial_cell_state = this->fVec_" << OpName - << "_initial_cell_state.data();\n"; + out << SP << fType << " const* " << OpName << "_initial_cell_state = this->fVec_" << OpName + << "_initial_cell_state;\n"; else out << SP << fType << " " << OpName << "_initial_cell_state[" << num_directions * batch_size * fAttrHiddenSize << "] = {0};\n"; @@ -518,12 +535,12 @@ auto ROperator_LSTM::Generate(std::string OpName) -> std::string // Set the feedforward size_t ff_size = seq_length * batch_size * fAttrHiddenSize; if (fUseSession) { - out << SP << fType << " * " << OpName << "_ff_input_gate = this->fVec_" << OpName << "_ff_input_gate.data();\n"; - out << SP << fType << " * " << OpName << "_ff_output_gate = this->fVec_" << OpName << "_ff_output_gate.data();\n"; - out << SP << fType << " * " << OpName << "_ff_cell_gate = this->fVec_" << OpName << "_ff_cell_gate.data();\n"; + out << SP << fType << " * " << OpName << "_ff_input_gate = this->fVec_" << OpName << "_ff_input_gate;\n"; + out << SP << fType << " * " << OpName << "_ff_output_gate = this->fVec_" << OpName << "_ff_output_gate;\n"; + out << SP << fType << " * " << OpName << "_ff_cell_gate = this->fVec_" << OpName << "_ff_cell_gate;\n"; if (fAttrInputForget == 0) { out << SP << fType << " * " << OpName << "_ff_forget_gate = this->fVec_" << OpName - << "_ff_forget_gate.data();\n"; + << "_ff_forget_gate;\n"; } } else { out << SP << fType << " " << OpName << "_ff_input_gate[" << ff_size << "] = {0};\n"; @@ -536,11 +553,11 @@ auto ROperator_LSTM::Generate(std::string OpName) -> std::string // Set the gates size_t hidden_state_size = seq_length * num_directions * batch_size * fAttrHiddenSize; if (fUseSession) { - out << SP << fType << " * " << OpName << "_input_gate = this->fVec_" << OpName << "_input_gate.data();\n"; - out << SP << fType << " * " << OpName << "_output_gate = this->fVec_" << OpName << "_output_gate.data();\n"; - out << SP << fType << " * " << OpName << "_cell_gate = this->fVec_" << OpName << "_cell_gate.data();\n"; + out << SP << fType << " * " << OpName << "_input_gate = this->fVec_" << OpName << "_input_gate;\n"; + out << SP << fType << " * " << OpName << "_output_gate = this->fVec_" << OpName << "_output_gate;\n"; + out << SP << fType << " * " << OpName << "_cell_gate = this->fVec_" << OpName << "_cell_gate;\n"; if (fAttrInputForget == 0) { - out << SP << fType << " * " << OpName << "_forget_gate = this->fVec_" << OpName << "_forget_gate.data();\n"; + out << SP << fType << " * " << OpName << "_forget_gate = this->fVec_" << OpName << "_forget_gate;\n"; } } else { out << SP << fType << " " << OpName << "_input_gate[" << hidden_state_size << "] = {0};\n"; @@ -552,8 +569,8 @@ auto ROperator_LSTM::Generate(std::string OpName) -> std::string } // Set the cell state and the new cell state = h(cell state) if (fUseSession) { - out << SP << fType << " * " << OpName << "_cell_state = this->fVec_" << OpName << "_cell_state.data();\n"; - out << SP << fType << " * " << OpName << "_new_cell_state = this->fVec_" << OpName << "_new_cell_state.data();\n"; + out << SP << fType << " * " << OpName << "_cell_state = this->fVec_" << OpName << "_cell_state;\n"; + out << SP << fType << " * " << OpName << "_new_cell_state = this->fVec_" << OpName << "_new_cell_state;\n"; } else { out << SP << fType << " " << OpName << "_cell_state[" << hidden_state_size << "] = {0};\n"; out << SP << fType << " " << OpName << "_new_cell_state[" << hidden_state_size << "] = {0};\n"; @@ -564,7 +581,7 @@ auto ROperator_LSTM::Generate(std::string OpName) -> std::string out << SP << fType << " *" << OpName << "_hidden_state = tensor_" << fNY << ";\n"; } else { if (fUseSession) { - out << SP << fType << " * " << OpName << "_hidden_state = this->fVec_" << OpName << "_hidden_state.data();\n"; + out << SP << fType << " * " << OpName << "_hidden_state = this->fVec_" << OpName << "_hidden_state;\n"; } else { out << SP << fType << " " << OpName << "_hidden_state[" << hidden_state_size << "] = {0};\n"; } diff --git a/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx b/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx index 1fcb9cb45e74d..8587035f8d44b 100644 --- a/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx @@ -101,16 +101,16 @@ public: } } } + std::string GenerateSessionMembersCode(std::string /*opName*/) override { if (fIsOutputConstant) return ""; // define output value used as max non zero with max size = input shape * N auto inputLength = ConvertDimShapeToLength(fShapeX); std::stringstream out; - out << SP << "size_t v_NonZero_" << fNX << " = " << inputLength << ";\n"; + out << SP << "size_t fV_NonZero_" << fNX << " = " << inputLength << ";\n"; return out.str(); } - std::string Generate(std::string opName) override { if (fIsOutputConstant) { return ""; @@ -133,7 +133,7 @@ public: // loop on input indices out << SP << "size_t offset_" << opName << " = 0;\n"; - out << SP << vnonzero << " = 0;\n"; + out << SP << "size_t " << vnonzero << " = 0;\n"; for (size_t j = 0; j < dims; j++) { std::string index = "i_" + std::to_string(j); for (size_t k = 0; k <= j; k++) out << SP; diff --git a/tmva/sofie/inc/TMVA/ROperator_RNN.hxx b/tmva/sofie/inc/TMVA/ROperator_RNN.hxx index 0667047eed228..f385a502d4077 100644 --- a/tmva/sofie/inc/TMVA/ROperator_RNN.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_RNN.hxx @@ -308,18 +308,38 @@ std::string ROperator_RNN::GenerateSessionMembersCode(std::string opName) size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0]; size_t input_size = fShapeX[2]; + struct Block { + std::string name; + size_t size; + }; + + std::vector blocks; + if (fAttrLayout != 0) { - out << "std::vector<" << fType << "> fVec_" << opName << "_input = std::vector<" << fType << ">(" - << seq_length * batch_size * input_size << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_initial_hidden_state = std::vector<" << fType << ">(" - << num_directions * batch_size * fAttrHiddenSize << ");\n"; + blocks.push_back({"input", seq_length * batch_size * input_size}); + blocks.push_back({"initial_hidden_state", num_directions * batch_size * fAttrHiddenSize}); } - out << "std::vector<" << fType << "> fVec_" << opName << "_feedforward = std::vector<" << fType << ">(" - << seq_length * batch_size * fAttrHiddenSize << ");\n"; - + blocks.push_back({"feedforward", seq_length * batch_size * fAttrHiddenSize}); if (fAttrLayout != 0 || fNY.empty()) { - out << "std::vector<" << fType << "> fVec_" << opName << "_hidden_state = std::vector<" << fType << ">(" - << seq_length * num_directions * batch_size * fAttrHiddenSize << ");\n"; + blocks.push_back({"hidden_state", seq_length * num_directions * batch_size * fAttrHiddenSize}); + } + + // Compute total size + size_t total_size = 0; + for (const auto &b : blocks) { + total_size += b.size; + } + + // Emit backing storage + out << "std::vector<" << fType << "> fVec_" << opName << "_buffer = std::vector<" << fType << ">(" << total_size + << ");\n"; + + // Emit pointers + std::size_t offset = 0; + for (const auto &b : blocks) { + out << fType << "* fVec_" << opName << "_" << b.name << " = fVec_" << opName << "_buffer.data() + " << offset + << ";\n"; + offset += b.size; } out << "\n"; @@ -346,7 +366,7 @@ auto ROperator_RNN::Generate(std::string OpName) -> std::string } } else { if (fUseSession) - out << SP << fType << " * " << OpName << "_input = this->fVec_" << OpName << "_input.data();\n"; + out << SP << fType << " * " << OpName << "_input = this->fVec_" << OpName << "_input;\n"; else out << SP << fType << " " << OpName << "_input[" << seq_length * batch_size * input_size << "];\n"; out << SP << "for(size_t seq = 0; seq < " << seq_length << "; seq++) {\n"; @@ -367,7 +387,7 @@ auto ROperator_RNN::Generate(std::string OpName) -> std::string } else { if (fUseSession) out << SP << fType << " * " << OpName << "_initial_hidden_state = this->fVec_" << OpName - << "_initial_hidden_state.data();\n"; + << "_initial_hidden_state;\n"; else out << fType << " " << OpName << "_initial_hidden_state[" << num_directions * batch_size * fAttrHiddenSize << "] = {0};\n"; @@ -385,7 +405,7 @@ auto ROperator_RNN::Generate(std::string OpName) -> std::string } if (fUseSession) - out << SP << fType << " * " << OpName << "_feedforward = this->fVec_" << OpName << "_feedforward.data();\n"; + out << SP << fType << " * " << OpName << "_feedforward = this->fVec_" << OpName << "_feedforward;\n"; else out << SP << fType << " " << OpName << "_feedforward[" << seq_length * batch_size * fAttrHiddenSize << "] = {0};\n"; @@ -395,7 +415,7 @@ auto ROperator_RNN::Generate(std::string OpName) -> std::string out << SP << fType << " *" << OpName << "_hidden_state = tensor_" << fNY << ";\n"; } else { if (fUseSession) - out << SP << fType << " * " << OpName << "_hidden_state = this->fVec_" << OpName << "_hidden_state.data();\n"; + out << SP << fType << " * " << OpName << "_hidden_state = this->fVec_" << OpName << "_hidden_state;\n"; else out << SP << fType << " " << OpName << "_hidden_state[" << seq_length * num_directions * batch_size * fAttrHiddenSize << "] = {0};\n"; diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx index fddc07a85fc08..8769193080b39 100644 --- a/tmva/sofie/inc/TMVA/SOFIE_common.hxx +++ b/tmva/sofie/inc/TMVA/SOFIE_common.hxx @@ -681,16 +681,6 @@ void col2im(const Dtype* data_col, const int channels, //std::cout << "finishing col2imp" << std::endl; } -// Used at the end of infer() to fill the return object. -template -void FillOutput(T const *arr, std::vector &out, std::size_t n) -{ - out.resize(n); - for (std::size_t i = 0; i < n; ++i) { - out[i] = arr[i]; - } -} - } // end namespace UTILITY namespace BLAS{ diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index e89e8513cf783..74e672779fcf5 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -11,14 +11,34 @@ #include "TMVA/RModel.hxx" #include "TMVA/SOFIE_common.hxx" -namespace TMVA { -namespace Experimental { -namespace SOFIE { +namespace TMVA::Experimental::SOFIE { namespace { + const std::string SP = " "; + +void ReplaceAll(std::string &str, const std::string &from, const std::string &to) +{ + size_t pos = 0; + while ((pos = str.find(from, pos)) != std::string::npos) { + str.replace(pos, from.length(), to); + pos += to.length(); + } +} + +bool IsIdentifierChar(char c) +{ + return std::isalnum(static_cast(c)) || c == '_'; } +// Get the data member name corresponding to a tensor with a given name. +std::string TensorMember(std::string const &name) +{ + return "tensor_" + name; +} + +} // namespace + std::underlying_type_t operator|(Options opA, Options opB) { return static_cast>(opA) | static_cast>(opB); } @@ -26,6 +46,7 @@ std::underlying_type_t operator|(std::underlying_type_t opA, O return opA | static_cast>(opB); } + std::vector RModel::GetTensorShape(const std::string & name) const { auto f = fReadyInputTensorInfos.find(name); if (f != fReadyInputTensorInfos.end()) { @@ -356,7 +377,7 @@ std::string RModel::AllocateIntermediateMemory(std::span std::string typeName = ConvertTypeToString(GetTensorType(name)); code << "\n // Allocating memory for intermediate tensor " << name << " with size " << size << " bytes"; code << "\n" - << typeName << "* tensor_" << name << " = reinterpret_cast<" << typeName + << typeName << "* " << TensorMember(name) << " = reinterpret_cast<" << typeName << "*>(fIntermediateMemoryPool.data() + " << location << ");\n"; }; @@ -714,7 +735,8 @@ std::string GenerateConstantTensorCode(const std::pair fTensor_" << t.first << " = "; if (sameData) @@ -722,7 +744,7 @@ std::string GenerateConstantTensorCode(const std::pair(i); - fConstantTensorSize += ConvertShapeToLength(i.second.shape()) * 4; + fConstantTensorSize += length * sizeof(float); } else if (i.second.type() == ETensorType::INT64) { fGC += GenerateConstantTensorCode(i); - fConstantTensorSize += ConvertShapeToLength(i.second.shape()) * 8; + fConstantTensorSize += length * sizeof(int64_t); } } else { // case of tensors which are read from a file - size_t length = ConvertShapeToLength(i.second.shape()); if (i.second.type() == ETensorType::FLOAT) { fGC += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(length) + ");\n"; - fGC += "float * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; - fWeightsTensorSize += ConvertShapeToLength(i.second.shape()) * 4; + fGC += "float * " + TensorMember(i.first) + " = fTensor_" + i.first + ".data();\n"; + fWeightsTensorSize += length * sizeof(float); } } } @@ -774,7 +796,7 @@ void RModel::GenerateIntermediateTensorInfo() { bool is_alias = (IsAliasTensor(i.first)); if (i.second.type == ETensorType::BOOL && !is_alias) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(ConvertShapeToLength(i.second.shape)) + ");\n"; - tensor_declaration_block += "std::uint8_t * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + tensor_declaration_block += "std::uint8_t * " + TensorMember(i.first) + " = fTensor_" + i.first + ".data();\n"; continue; } bool is_extended = (fOptimizationLevel == OptimizationLevel::kExtended); @@ -788,22 +810,22 @@ void RModel::GenerateIntermediateTensorInfo() { if (i.second.type == ETensorType::FLOAT) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(length) + ");\n"; - tensor_declaration_block += "float * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + tensor_declaration_block += "float * " + TensorMember(i.first) + " = fTensor_" + i.first + ".data();\n"; fOtherTensorSize += 4 * length; } else if (i.second.type == ETensorType::DOUBLE) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(length) + ");\n"; - tensor_declaration_block += "double * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + tensor_declaration_block += "double * " + TensorMember(i.first) + " = fTensor_" + i.first + ".data();\n"; fOtherTensorSize += 8 * length; } else if (i.second.type == ETensorType::INT64) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(length) + ");\n"; - tensor_declaration_block += "int64_t * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + tensor_declaration_block += "int64_t * " + TensorMember(i.first) + " = fTensor_" + i.first + ".data();\n"; fOtherTensorSize += 8 * length; } } if (is_alias) { - tensor_declaration_block += ConvertTypeToString(i.second.type) + " * tensor_" + i.first + " = nullptr;\n"; + tensor_declaration_block += ConvertTypeToString(i.second.type) + " * " + TensorMember(i.first) + " = nullptr;\n"; } } @@ -816,7 +838,7 @@ void RModel::GenerateIntermediateTensorInfo() { if (!fDynamicTensorInfos.empty()) { fGC += "//--- declare the dynamic tensors\n"; for (auto &i : fDynamicTensorInfos) { - fGC += ConvertTypeToString(i.second.type) + " * tensor_" + i.first + " = nullptr;\n"; + fGC += ConvertTypeToString(i.second.type) + " * " + TensorMember(i.first) + " = nullptr;\n"; } fGC += "//--- dynamic tensors pool\n"; fGC += "std::vector fDynamicMemoryPool;\n"; @@ -862,9 +884,10 @@ void RModel::GenerateDynamicTensorInfo() auto op_ptr = op.get(); std::cout << "Looping on operator " << op_index << " " << typeid(*op_ptr).name() << std::endl; } - // check if is a dynamic tensor and not an alias tensor + // check if is a dynamic tensor and not an alias tensor or output tensor std::string name = std::string(it); - if ( fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end() && !IsAliasTensor(name)) { + if ( fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end() && !IsAliasTensor(name) + && std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), name) == fOutputTensorNames.end()) { auto tensor_size = ConvertDimShapeToLength(GetDimTensorShape(name)); auto type = GetTensorType(name); size_t type_size = GetTypeSize(type); @@ -901,6 +924,7 @@ void RModel::GenerateDynamicTensorInfo() bool missingTensor = false; for (auto &i : fDynamicTensorInfos) { if (IsAliasTensor(i.first)) continue; + if (std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), i.first) != fOutputTensorNames.end()) continue; if (std::find(tensors.begin(), tensors.end(), std::pair{i.first, i.second.type}) == tensors.end()) { std::cout << "Dynamic tensors " << i.first << " is not in list of operator input/output " << std::endl; missingTensor = true; @@ -912,6 +936,83 @@ void RModel::GenerateDynamicTensorInfo() fGC += out.str(); } +/// Check if a given parameter is used for the shape of an input tensor. +bool RModel::IsInputTensorShapeParam(std::string const ¶mName) const +{ + for (auto &name : fInputTensorNames) { + if (IsDimInputTensor(name)) { + auto shape = GetDynamicTensorShape(name); + for (auto &d : shape) { + if (d.param == paramName) + return true; + } + } + } + return false; +} + +/// Collects all identifiers starting with "tensor_" in the input code, +/// provided that the occurrence is not immediately preceded by a +/// character that is valid in a C++ identifier. Excludes input and output tensor names. +/// Returns a deduplicated std::vector. +std::vector RModel::CollectTensorMemberNames(const std::string &input) +{ + const std::string target = "tensor_"; + + std::vector result; + + for (size_t i = 0; i < input.size();) { + + bool doCollect = false; + + if (i + target.size() <= input.size() && input.compare(i, target.size(), target) == 0 && + (i == 0 || !IsIdentifierChar(input[i - 1]))) { + + doCollect = true; + + std::size_t j = i + target.size(); + + // Extend to full identifier + while (j < input.size() && IsIdentifierChar(input[j])) + ++j; + + std::string fullName = input.substr(i, j - i); + + // Exclude input tensor names + for (std::string const &name : fInputTensorNames) { + if (fullName == target + name) { + doCollect = false; + break; + } + } + + // Exclude output tensor names + if (doCollect) { + for (std::string const &name : fOutputTensorNames) { + if (fullName == target + name) { + doCollect = false; + break; + } + } + } + + if (doCollect) { + result.push_back(fullName); + } + + i = j; // advance past the identifier + } else { + ++i; + } + } + + // Deduplicate (order not preserved) + std::sort(result.begin(), result.end()); + result.erase(std::unique(result.begin(), result.end()), result.end()); + + return result; +} + std::string RModel::GenerateInferSignature(bool isdecl) { // generate the infer signature given the inputs: eg. "float * tensor1, float * tensor2" // if (decl = false) generate only calling signature (tensor1,tensor2,....) @@ -1004,8 +1105,24 @@ void RModel::GenerateOutput() if (!doInferArgs.empty()) doInferArgs += ","; for (std::string const &name : fOutputTensorNames) { - fGC += SP + "std::vector<" + typeForOutput(GetTensorType(name)) + " > output_tensor_" + name + ";\n"; - doInferArgs += " output_tensor_" + name + ","; + bool isDynamic = fDynamicTensorInfos.count(name) > 0; + std::string n; + if(!isDynamic) { + n = std::to_string(ConvertShapeToLength(GetTensorShape(name))); + } else { + n = memberNameForDimShape(ConvertDimShapeToLength(GetDynamicTensorShape(name))); + } + std::string outputName = "output_tensor_" + name; + fGC += SP + "std::vector<" + typeForOutput(GetTensorType(name)) + " > " + outputName + "(" + n + ");\n"; + doInferArgs += " " + outputName + ".data(),"; + if(isDynamic) { + for (auto const &dim : GetDynamicTensorShape(name)) { + if (dim.isParam && !IsInputTensorShapeParam(dim.param)) { + fGC += SP + "size_t " + dim.param + " = 0;\n"; + doInferArgs += " " + dim.param + ","; + } + } + } } if (!doInferArgs.empty()) doInferArgs.back() = ' '; @@ -1031,7 +1148,21 @@ void RModel::GenerateOutput() } } - fGC += SP + "doInfer(" + doInferArgs + ");\n"; + if (fUseSession) { + fGC += SP + "doInfer(*this, " + doInferArgs + ");\n"; + } else { + fGC += SP + "doInfer(" + doInferArgs + ");\n"; + } + + // If the output tensors have dynamic sizes, now is the time to set them + for (std::string const &name : fOutputTensorNames) { + bool isDynamic = fDynamicTensorInfos.count(name) > 0; + if (isDynamic) { + std::string outputName = "output_tensor_" + name; + auto tensor_size = ConvertDimShapeToLength(GetDimTensorShape(name)); + fGC += " " + outputName + ".resize(" + tensor_size + ");\n"; + } + } fGC += SP + "return {"; for (size_t i = 0; i < fOutputTensorNames.size(); i++) { @@ -1045,23 +1176,43 @@ void RModel::GenerateOutput() void RModel::GenerateSessionCode() { + std::string sessionName = !fIsSubGraph ? "Session" : "Session_" + fName; + + if (fUseSession && !fIsGNNComponent) { + // forward declare session struct + fGC += "struct " + sessionName + ";\n"; + } + // Determine the signature of the actual inference function std::string doInferSignature = GenerateInferSignature(); if (!doInferSignature.empty()) doInferSignature += ", "; for (auto const &name : fOutputTensorNames) { - doInferSignature += " std::vector<" + typeForOutput(GetTensorType(name)) + "> &output_tensor_" + name + ","; + bool isDynamic = fDynamicTensorInfos.count(name) > 0; + doInferSignature += typeForOutput(GetTensorType(name)) + " *tensor_" + name + ","; + if(isDynamic) { + for (auto const &dim : GetDynamicTensorShape(name)) { + if (dim.isParam && !IsInputTensorShapeParam(dim.param)) + doInferSignature += " size_t &" + dim.param + "_output,"; + } + } } doInferSignature.back() = ' '; - doInferSignature = "void doInfer(" + doInferSignature + ")"; + if (fUseSession) { + doInferSignature = sessionName + " const &session, " + doInferSignature; + } + + doInferSignature = "inline void doInfer(" + doInferSignature + ")"; + + if (!fIsGNNComponent) { + // forward declare inference implementation + fGC += doInferSignature + ";\n"; + } // define the Session struct (for GNN this is generated in RModel_GNN) if (fUseSession && !fIsGNNComponent) { - if (!fIsSubGraph) - fGC += "struct Session {\n"; - else - fGC += "struct Session_" + fName + " {\n"; + fGC += "struct " + sessionName + " {\n"; } // generate code for declaring the initialized tensors @@ -1115,9 +1266,6 @@ void RModel::GenerateSessionCode() // Generate code for Session constructor if (fUseSession) { - std::string sessionName = "Session"; - if (fIsSubGraph) - sessionName += "_" + fName; // add here specific operator code that needs to define session data members fGC += "\n"; for (size_t id = 0; id < fOperators.size(); id++) { @@ -1179,7 +1327,15 @@ void RModel::GenerateSessionCode() fGC += "}\n\n"; } - fGC += doInferSignature + "{\n"; + // generate the inference overload that returns an output struct + GenerateOutput(); + + // end of session + if (fUseSession && !fIsGNNComponent) { + fGC += "}; // end of Session\n\n"; + } + + fGC += doInferSignature + " {\n"; fGC += "\n"; // generate the inference code @@ -1189,32 +1345,47 @@ void RModel::GenerateSessionCode() if (fOutputTensorNames.size() == 0) throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported"); + std::string allOperatorCode; + for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { if (fVerbose) std::cout << "Generating code for operator .... " << op_idx << std::endl; - fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx))); + std::string operatorCode = fOperators[op_idx]->Generate(std::to_string(op_idx)); + allOperatorCode += operatorCode; } - fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n"; + // If the generated code users members of the session struct, use the + // local variable name that we're using for the session: + ReplaceAll(allOperatorCode, "this->", "session."); - for (std::string const &name : fOutputTensorNames) { - // need to check is size is the same (don't want to return a vector with - // larger size) in that case better to copy - bool isIntermediate = fIntermediateTensorInfos.count(name) > 0; - std::string n = isIntermediate ? std::to_string(ConvertShapeToLength(GetTensorShape(name))) - : ConvertDimShapeToLength(GetDimTensorShape(name)); - fGC += SP + "FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n"; + if (fUseSession && !fIsGNNComponent) { + // Collect all "tensor_*" data members that are not input or output tensors + std::vector tensorMemberNames = CollectTensorMemberNames(allOperatorCode); + for (auto const& name: tensorMemberNames) { + fGC += " auto &" + name + " = session." + name + ";\n"; + } + fGC += "\n"; } - fGC += "}\n\n"; - - // generate the inference overload that returns an output struct - GenerateOutput(); + fGC += allOperatorCode; - // end of session - if (fUseSession && !fIsGNNComponent) { - fGC += "}; // end of Session\n\n"; + for (auto const& name: fOutputTensorNames) { + bool isDynamic = fDynamicTensorInfos.count(name) > 0; + if(isDynamic) { + for (auto const &dim : GetDynamicTensorShape(name)) { + if (dim.isParam && !IsInputTensorShapeParam(dim.param)) + fGC += " " + dim.param + "_output = " + dim.param + ";\n"; + } + } + if(IsConstantTensor(name)) { + std::string t = "session.tensor_" + name; + size_t length = ConvertShapeToLength(fInitializedTensors[name].shape()); + fGC += " std::copy(" + t + ", " + t + " + " + std::to_string(length) + ", tensor_" + name + ");\n"; + } } + fGC += "\n"; + + fGC += "}\n"; } void RModel::Generate(std::underlying_type_t options, int batchSize, long pos, bool verbose) @@ -1648,6 +1819,4 @@ void RModel::Streamer(TBuffer &R__b) { } } -}//SOFIE -}//Experimental -}//TMVA +} // namespace SOFIE::Experimental::TMVA diff --git a/tmva/sofie/test/CMakeLists.txt b/tmva/sofie/test/CMakeLists.txt index 1a9295237cff1..120b5800b2f8d 100644 --- a/tmva/sofie/test/CMakeLists.txt +++ b/tmva/sofie/test/CMakeLists.txt @@ -48,14 +48,20 @@ ROOTTEST_ADD_TEST(SofieCompileModels_ONNX # Creating a Google Test if (BLAS_FOUND) # we need BLAS for compiling the models ROOT_EXECUTABLE(TestCustomModelsFromONNX TestCustomModelsFromONNX.cxx - LIBRARIES - Core - GTest::gtest - GTest::gtest_main + LIBRARIES Core GTest::gtest GTest::gtest_main ) ROOTTEST_ADD_TEST(TestCustomModelsFromONNX EXEC ./TestCustomModelsFromONNX FIXTURES_REQUIRED sofie-compile-models-onnx) + + if (clad) + ROOT_EXECUTABLE(TestCladAutodiff TestCladAutodiff.cxx + LIBRARIES Core GTest::gtest GTest::gtest_main + ) + ROOTTEST_ADD_TEST(TestCladAutodiff + EXEC ./TestCladAutodiff + FIXTURES_REQUIRED sofie-compile-models-onnx) + endif() endif() # For testing serialisation of RModel object diff --git a/tmva/sofie/test/TestCladAutodiff.cxx b/tmva/sofie/test/TestCladAutodiff.cxx new file mode 100644 index 0000000000000..bdd040e4ea03f --- /dev/null +++ b/tmva/sofie/test/TestCladAutodiff.cxx @@ -0,0 +1,111 @@ +constexpr auto modelHeaderSuffix = "_FromONNX.hxx"; +constexpr auto modelDataSuffix = "_FromONNX.dat"; +#include "test_helpers.h" + +#include "input_models/references/Linear_16.ref.hxx" + +#include "gtest/gtest.h" + +// Test differentiating a fully-connected neural network with Clad. +// Extension of the ONNX.Linear16 test in TestCustomModelsFromONNX.cxx +TEST(ONNXClad, Linear16) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + // Preparing the standard all-ones input + std::vector input(1600); + std::fill_n(input.data(), input.size(), 1.0f); + + ASSERT_INCLUDE_AND_RUN(std::vector, "Linear_16", input); + + gInterpreter->Declare(R"( +#include + +float Linear_16_wrapper(TMVA_SOFIE_Linear_16::Session const &session, float const *input) +{ + float out[160]{}; + float output_sum = 0.0; + + TMVA_SOFIE_Linear_16::doInfer(session, input, out); + + for (std::size_t i = 0; i < std::size(out); ++i) { + output_sum += out[i]; + } + return output_sum; +} + +float Linear_16_outer_wrapper(TMVA_SOFIE_Linear_16::Session const &session, float const *input) +{ + return Linear_16_wrapper(session, input); +} + +float Linear_16_wrapper_num_diff(TMVA_SOFIE_Linear_16::Session const &session, float *input, std::size_t i) +{ + const float origVal = input[i]; + + const float eps = 1e-3; + input[i] = origVal - eps; + float funcValDown = Linear_16_wrapper(session, input); + input[i] = origVal + eps; + float funcValUp = Linear_16_wrapper(session, input); + input[i] = origVal; + + return (funcValUp - funcValDown) / (2 * eps); +} + )"); + + auto inputInterp = toInterpreter(input, "std::vector", true); + + // Why do we have two wrappers, the <>_wrapper and the <>_outer_wrapper? + // This is because we are not interested in the created gradient function. + // We are interested in the more low-level *pullback* function, which takes + // also the data structures for the reverse pass as function arguments. Like + // this, we can initialize the session for the backward pass once and re-use + // it. The trick to get the wrapper pullback is to create another wrapper + // around the wrapper, and creating the gradient for the outer wrapper + // implicitly creates the pullback for the inner wrapper. + gInterpreter->ProcessLine("clad::gradient(Linear_16_outer_wrapper, \"input\");"); + + // Create two session data structures: one for the forward, and one for the backward pass + gInterpreter->ProcessLine("TMVA_SOFIE_Linear_16::Session session_linear_16{\"Linear_16_FromONNX.dat\"};"); + gInterpreter->ProcessLine("TMVA_SOFIE_Linear_16::Session _d_session_linear_16{\"Linear_16_FromONNX.dat\"};"); + + gInterpreter->ProcessLine("float grad_output[1600]{};"); + gInterpreter->ProcessLine( + ("Linear_16_wrapper_pullback(session_linear_16, " + inputInterp + ", 1, &_d_session_linear_16, grad_output)") + .c_str()); + + // If you want to see the gradient code: + // gInterpreter->ProcessLine("static_cast(Linear_16_outer_wrapper_grad_1)"); gInterpreter->ProcessLine("Linear_16_wrapper_pullback"); + // gInterpreter->ProcessLine("TMVA_SOFIE_Linear_16::doInfer_reverse_forw"); + // gInterpreter->ProcessLine("TMVA_SOFIE_Linear_16::doInfer_pullback"); + + auto retVal = gInterpreter->ProcessLine((R"( + double maxDiff = 0; + for (std::size_t i = 0; i < std::size(grad_output); ++i) { + double val = grad_output[i]; + double ref = Linear_16_wrapper_num_diff(session_linear_16, )" + + inputInterp + R"(, i); + if (val != ref) { + maxDiff = std::max(std::abs(val - ref), maxDiff); + } + } + double tol = 0.0025; + // the "return" value + (maxDiff < tol); + )") + .c_str()); + + EXPECT_EQ(retVal, 1) << "The gradient from Clad and the numeric gradient didn't match within tolerance."; + + // Checking output size + EXPECT_EQ(output.size(), sizeof(Linear_16_ExpectedOutput::all_ones) / sizeof(float)); + + float *correct = Linear_16_ExpectedOutput::all_ones; + + // Checking every output value, one by one + for (size_t i = 0; i < output.size(); ++i) { + EXPECT_LE(std::abs(output[i] - correct[i]), TOLERANCE); + } +}