diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx index bc6493090f74e..3395de6d34166 100644 --- a/tmva/sofie/inc/TMVA/RModel.hxx +++ b/tmva/sofie/inc/TMVA/RModel.hxx @@ -35,8 +35,6 @@ private: std::vector fOutputTensorNames; std::vector fInputTensorNames; // input tensor names using ONNX order - - std::vector> fOperators; std::vector> fSubGraphs; /// CollectTensorMemberNames(const std::string &input); public: const std::vector & GetInputTensorNames() const { return fInputTensorNames; } diff --git a/tmva/sofie/inc/TMVA/ROperator.hxx b/tmva/sofie/inc/TMVA/ROperator.hxx index 6fac7958f8f9d..f0afd9c4374c1 100644 --- a/tmva/sofie/inc/TMVA/ROperator.hxx +++ b/tmva/sofie/inc/TMVA/ROperator.hxx @@ -25,9 +25,7 @@ public: virtual std::vector TypeInference(std::vector) { return {}; }; virtual void Initialize(RModel&) = 0; virtual std::string Generate(std::string OpName) = 0; //expect unique opName for each operator within the same RModel - // generate code for Session constructor before tensor allocation - virtual std::string GenerateSessionCtorCode() { return "";} - // generate initialization code for session constructor after tensor allocations + // generate initialization code for session constructor virtual std::string GenerateInitCode() { return "";} // generate some specific declaration code for Session virtual std::string GenerateDeclCode() { return "";} diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx index ecdd0b435fe37..83381baa39f0c 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx @@ -393,9 +393,12 @@ namespace SOFIE{ << (fAttrTransB ? "true, " : "false, ") << (fAttrTransA ? "true, " : "false, ") << n << ", " << m << ", " << k << ", "; - out << std::setprecision(std::numeric_limits::max_digits10) << fAttrAlpha << ", tensor_" << fNB; + // TODO: the cast to (float *) is not needed here from the C++ language perspective (the arguments to + // Gemm_Call are const already), but Clad bug https://github.com/vgvassilev/clad/issues/1721 is requiring + // us to do this cast to keep Clad working. Remove this hack once the Clad issue is fixed. + out << std::setprecision(std::numeric_limits::max_digits10) << fAttrAlpha << ", (float*)tensor_" << fNB; if (extraB) out << " + " << opName << "_B_offset"; - out << ", tensor_" << fNA; + out << ", (float*)tensor_" << fNA; // TODO: same here if (extraA) out << " + " << opName << "_A_offset"; out << ", " << std::setprecision(std::numeric_limits::max_digits10) << fAttrBeta << ","; // in the case of bias and no broadcasting needed diff --git a/tmva/sofie/inc/TMVA/ROperator_LSTM.hxx b/tmva/sofie/inc/TMVA/ROperator_LSTM.hxx index 84f37bc57da7e..ae0ee70c4eeea 100644 --- a/tmva/sofie/inc/TMVA/ROperator_LSTM.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_LSTM.hxx @@ -390,45 +390,62 @@ std::string ROperator_LSTM::GenerateSessionMembersCode(std::string opName) size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0]; size_t input_size = fShapeX[2]; + struct Block { + std::string name; + size_t size; + }; + + std::vector blocks; + + size_t ff_size = seq_length * batch_size * fAttrHiddenSize; + size_t hs_size = seq_length * num_directions * batch_size * fAttrHiddenSize; + + // Layout-dependent buffers if (fAttrLayout != 0) { - out << "std::vector<" << fType << "> fVec_" << opName << "_input = std::vector<" << fType << ">(" - << seq_length * batch_size * input_size << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_initial_hidden_state = std::vector<" << fType << ">(" - << num_directions * batch_size * fAttrHiddenSize << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_initial_cell_state = std::vector<" << fType << ">(" - << num_directions * batch_size * fAttrHiddenSize << ");\n"; + blocks.push_back({"input", seq_length * batch_size * input_size}); + blocks.push_back({"initial_hidden_state", num_directions * batch_size * fAttrHiddenSize}); + blocks.push_back({"initial_cell_state", num_directions * batch_size * fAttrHiddenSize}); } - // Set the feedforward - size_t ff_size = seq_length * batch_size * fAttrHiddenSize; - out << "std::vector<" << fType << "> fVec_" << opName << "_ff_input_gate = std::vector<" << fType << ">(" << ff_size - << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_ff_output_gate = std::vector<" << fType << ">(" << ff_size - << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_ff_cell_gate = std::vector<" << fType << ">(" << ff_size - << ");\n"; + + // Feedforward gates + blocks.push_back({"ff_input_gate", ff_size}); + blocks.push_back({"ff_output_gate", ff_size}); + blocks.push_back({"ff_cell_gate", ff_size}); if (fAttrInputForget == 0) - out << "std::vector<" << fType << "> fVec_" << opName << "_ff_forget_gate = std::vector<" << fType << ">(" - << ff_size << ");\n"; - // gate results - size_t hs_size = seq_length * num_directions * batch_size * fAttrHiddenSize; - out << "std::vector<" << fType << "> fVec_" << opName << "_input_gate = std::vector<" << fType << ">(" << hs_size - << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_output_gate = std::vector<" << fType << ">(" << hs_size - << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_cell_gate = std::vector<" << fType << ">(" << hs_size - << ");\n"; + blocks.push_back({"ff_forget_gate", ff_size}); + + // Gate outputs + blocks.push_back({"input_gate", hs_size}); + blocks.push_back({"output_gate", hs_size}); + blocks.push_back({"cell_gate", hs_size}); if (fAttrInputForget == 0) - out << "std::vector<" << fType << "> fVec_" << opName << "_forget_gate = std::vector<" << fType << ">(" << hs_size - << ");\n"; - // cell state - out << "std::vector<" << fType << "> fVec_" << opName << "_cell_state = std::vector<" << fType << ">(" << hs_size - << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_new_cell_state = std::vector<" << fType << ">(" << hs_size - << ");\n"; - // hiddden state + blocks.push_back({"forget_gate", hs_size}); + + // Cell state + blocks.push_back({"cell_state", hs_size}); + blocks.push_back({"new_cell_state", hs_size}); + + // Hidden state (conditional) if (fAttrLayout != 0 || fNY.empty()) { - out << "std::vector<" << fType << "> fVec_" << opName << "_hidden_state = std::vector<" << fType << ">(" - << hs_size << ");\n"; + blocks.push_back({"hidden_state", hs_size}); + } + + // Compute total size + size_t total_size = 0; + for (const auto &b : blocks) { + total_size += b.size; + } + + // Backing storage + out << "std::vector<" << fType << "> fVec_" << opName << "_buffer = std::vector<" << fType << ">(" << total_size + << ");\n"; + + // Emit pointers + std::size_t offset = 0; + for (const auto &b : blocks) { + out << fType << "* fVec_" << opName << "_" << b.name << " = fVec_" << opName << "_buffer.data() + " << offset + << ";\n"; + offset += b.size; } out << "\n"; @@ -452,7 +469,7 @@ auto ROperator_LSTM::Generate(std::string OpName) -> std::string out << SP << fType << " const *" << OpName << "_input = tensor_" << fNX << ";\n"; } else { if (fUseSession) - out << SP << fType << " * " << OpName << "_input = this->fVec_" << OpName << "_input.data();\n"; + out << SP << fType << " * " << OpName << "_input = this->fVec_" << OpName << "_input;\n"; else out << SP << fType << " " << OpName << "_input[" << seq_length * batch_size * input_size << "] = {0};\n"; @@ -470,11 +487,11 @@ auto ROperator_LSTM::Generate(std::string OpName) -> std::string // Set the initial hidden state if (!fNInitial_h.empty()) { if (fAttrLayout == 0) { - out << SP << fType << " *" << OpName << "_initial_hidden_state = " << " tensor_" << fNInitial_h << ";\n"; + out << SP << fType << " const*" << OpName << "_initial_hidden_state = " << " tensor_" << fNInitial_h << ";\n"; } else { if (fUseSession) - out << SP << fType << " * " << OpName << "_initial_hidden_state = this->fVec_" << OpName - << "_initial_hidden_state.data();\n"; + out << SP << fType << " const* " << OpName << "_initial_hidden_state = this->fVec_" << OpName + << "_initial_hidden_state;\n"; else out << SP << fType << " " << OpName << "_initial_hidden_state[" << num_directions * batch_size * fAttrHiddenSize << "] = {0};\n"; @@ -494,11 +511,11 @@ auto ROperator_LSTM::Generate(std::string OpName) -> std::string // Set the initial cell state if (!fNInitial_c.empty()) { if (fAttrLayout == 0) { - out << SP << fType << " *" << OpName << "_initial_cell_state = " << " tensor_" << fNInitial_c << ";\n"; + out << SP << fType << " const*" << OpName << "_initial_cell_state = " << " tensor_" << fNInitial_c << ";\n"; } else { if (fUseSession) - out << SP << fType << " * " << OpName << "_initial_cell_state = this->fVec_" << OpName - << "_initial_cell_state.data();\n"; + out << SP << fType << " const* " << OpName << "_initial_cell_state = this->fVec_" << OpName + << "_initial_cell_state;\n"; else out << SP << fType << " " << OpName << "_initial_cell_state[" << num_directions * batch_size * fAttrHiddenSize << "] = {0};\n"; @@ -518,12 +535,12 @@ auto ROperator_LSTM::Generate(std::string OpName) -> std::string // Set the feedforward size_t ff_size = seq_length * batch_size * fAttrHiddenSize; if (fUseSession) { - out << SP << fType << " * " << OpName << "_ff_input_gate = this->fVec_" << OpName << "_ff_input_gate.data();\n"; - out << SP << fType << " * " << OpName << "_ff_output_gate = this->fVec_" << OpName << "_ff_output_gate.data();\n"; - out << SP << fType << " * " << OpName << "_ff_cell_gate = this->fVec_" << OpName << "_ff_cell_gate.data();\n"; + out << SP << fType << " * " << OpName << "_ff_input_gate = this->fVec_" << OpName << "_ff_input_gate;\n"; + out << SP << fType << " * " << OpName << "_ff_output_gate = this->fVec_" << OpName << "_ff_output_gate;\n"; + out << SP << fType << " * " << OpName << "_ff_cell_gate = this->fVec_" << OpName << "_ff_cell_gate;\n"; if (fAttrInputForget == 0) { out << SP << fType << " * " << OpName << "_ff_forget_gate = this->fVec_" << OpName - << "_ff_forget_gate.data();\n"; + << "_ff_forget_gate;\n"; } } else { out << SP << fType << " " << OpName << "_ff_input_gate[" << ff_size << "] = {0};\n"; @@ -536,11 +553,11 @@ auto ROperator_LSTM::Generate(std::string OpName) -> std::string // Set the gates size_t hidden_state_size = seq_length * num_directions * batch_size * fAttrHiddenSize; if (fUseSession) { - out << SP << fType << " * " << OpName << "_input_gate = this->fVec_" << OpName << "_input_gate.data();\n"; - out << SP << fType << " * " << OpName << "_output_gate = this->fVec_" << OpName << "_output_gate.data();\n"; - out << SP << fType << " * " << OpName << "_cell_gate = this->fVec_" << OpName << "_cell_gate.data();\n"; + out << SP << fType << " * " << OpName << "_input_gate = this->fVec_" << OpName << "_input_gate;\n"; + out << SP << fType << " * " << OpName << "_output_gate = this->fVec_" << OpName << "_output_gate;\n"; + out << SP << fType << " * " << OpName << "_cell_gate = this->fVec_" << OpName << "_cell_gate;\n"; if (fAttrInputForget == 0) { - out << SP << fType << " * " << OpName << "_forget_gate = this->fVec_" << OpName << "_forget_gate.data();\n"; + out << SP << fType << " * " << OpName << "_forget_gate = this->fVec_" << OpName << "_forget_gate;\n"; } } else { out << SP << fType << " " << OpName << "_input_gate[" << hidden_state_size << "] = {0};\n"; @@ -552,8 +569,8 @@ auto ROperator_LSTM::Generate(std::string OpName) -> std::string } // Set the cell state and the new cell state = h(cell state) if (fUseSession) { - out << SP << fType << " * " << OpName << "_cell_state = this->fVec_" << OpName << "_cell_state.data();\n"; - out << SP << fType << " * " << OpName << "_new_cell_state = this->fVec_" << OpName << "_new_cell_state.data();\n"; + out << SP << fType << " * " << OpName << "_cell_state = this->fVec_" << OpName << "_cell_state;\n"; + out << SP << fType << " * " << OpName << "_new_cell_state = this->fVec_" << OpName << "_new_cell_state;\n"; } else { out << SP << fType << " " << OpName << "_cell_state[" << hidden_state_size << "] = {0};\n"; out << SP << fType << " " << OpName << "_new_cell_state[" << hidden_state_size << "] = {0};\n"; @@ -564,7 +581,7 @@ auto ROperator_LSTM::Generate(std::string OpName) -> std::string out << SP << fType << " *" << OpName << "_hidden_state = tensor_" << fNY << ";\n"; } else { if (fUseSession) { - out << SP << fType << " * " << OpName << "_hidden_state = this->fVec_" << OpName << "_hidden_state.data();\n"; + out << SP << fType << " * " << OpName << "_hidden_state = this->fVec_" << OpName << "_hidden_state;\n"; } else { out << SP << fType << " " << OpName << "_hidden_state[" << hidden_state_size << "] = {0};\n"; } diff --git a/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx b/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx index fdf04665e0315..8587035f8d44b 100644 --- a/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_NonZero.hxx @@ -92,8 +92,8 @@ public: fShapeY.resize(2); fShapeY[0] = fShapeX.size(); - // flag -1 to define the shape variable in the constructor code and not in the constructor signature - fShapeY[1] = Dim{std::string("v_NonZero_") + fNX, static_cast(-1) }; + // identify as -1 since we will declare maximum as size of input + fShapeY[1] = Dim{std::string("v_NonZero_") + fNX, static_cast(-1)}; model.AddIntermediateTensor(fNY, ETensorType::INT64, fShapeY); if (model.Verbose()) { @@ -101,16 +101,16 @@ public: } } } - std::string GenerateSessionCtorCode() override { + + std::string GenerateSessionMembersCode(std::string /*opName*/) override { if (fIsOutputConstant) return ""; // define output value used as max non zero with max size = input shape * N auto inputLength = ConvertDimShapeToLength(fShapeX); std::stringstream out; - out << SP << "size_t v_NonZero_" << fNX << " = " << inputLength << ";\n"; + out << SP << "size_t fV_NonZero_" << fNX << " = " << inputLength << ";\n"; return out.str(); } - std::string Generate(std::string opName) override { if (fIsOutputConstant) { return ""; diff --git a/tmva/sofie/inc/TMVA/ROperator_RNN.hxx b/tmva/sofie/inc/TMVA/ROperator_RNN.hxx index 0667047eed228..f385a502d4077 100644 --- a/tmva/sofie/inc/TMVA/ROperator_RNN.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_RNN.hxx @@ -308,18 +308,38 @@ std::string ROperator_RNN::GenerateSessionMembersCode(std::string opName) size_t batch_size = (fAttrLayout == 0) ? fShapeX[1] : fShapeX[0]; size_t input_size = fShapeX[2]; + struct Block { + std::string name; + size_t size; + }; + + std::vector blocks; + if (fAttrLayout != 0) { - out << "std::vector<" << fType << "> fVec_" << opName << "_input = std::vector<" << fType << ">(" - << seq_length * batch_size * input_size << ");\n"; - out << "std::vector<" << fType << "> fVec_" << opName << "_initial_hidden_state = std::vector<" << fType << ">(" - << num_directions * batch_size * fAttrHiddenSize << ");\n"; + blocks.push_back({"input", seq_length * batch_size * input_size}); + blocks.push_back({"initial_hidden_state", num_directions * batch_size * fAttrHiddenSize}); } - out << "std::vector<" << fType << "> fVec_" << opName << "_feedforward = std::vector<" << fType << ">(" - << seq_length * batch_size * fAttrHiddenSize << ");\n"; - + blocks.push_back({"feedforward", seq_length * batch_size * fAttrHiddenSize}); if (fAttrLayout != 0 || fNY.empty()) { - out << "std::vector<" << fType << "> fVec_" << opName << "_hidden_state = std::vector<" << fType << ">(" - << seq_length * num_directions * batch_size * fAttrHiddenSize << ");\n"; + blocks.push_back({"hidden_state", seq_length * num_directions * batch_size * fAttrHiddenSize}); + } + + // Compute total size + size_t total_size = 0; + for (const auto &b : blocks) { + total_size += b.size; + } + + // Emit backing storage + out << "std::vector<" << fType << "> fVec_" << opName << "_buffer = std::vector<" << fType << ">(" << total_size + << ");\n"; + + // Emit pointers + std::size_t offset = 0; + for (const auto &b : blocks) { + out << fType << "* fVec_" << opName << "_" << b.name << " = fVec_" << opName << "_buffer.data() + " << offset + << ";\n"; + offset += b.size; } out << "\n"; @@ -346,7 +366,7 @@ auto ROperator_RNN::Generate(std::string OpName) -> std::string } } else { if (fUseSession) - out << SP << fType << " * " << OpName << "_input = this->fVec_" << OpName << "_input.data();\n"; + out << SP << fType << " * " << OpName << "_input = this->fVec_" << OpName << "_input;\n"; else out << SP << fType << " " << OpName << "_input[" << seq_length * batch_size * input_size << "];\n"; out << SP << "for(size_t seq = 0; seq < " << seq_length << "; seq++) {\n"; @@ -367,7 +387,7 @@ auto ROperator_RNN::Generate(std::string OpName) -> std::string } else { if (fUseSession) out << SP << fType << " * " << OpName << "_initial_hidden_state = this->fVec_" << OpName - << "_initial_hidden_state.data();\n"; + << "_initial_hidden_state;\n"; else out << fType << " " << OpName << "_initial_hidden_state[" << num_directions * batch_size * fAttrHiddenSize << "] = {0};\n"; @@ -385,7 +405,7 @@ auto ROperator_RNN::Generate(std::string OpName) -> std::string } if (fUseSession) - out << SP << fType << " * " << OpName << "_feedforward = this->fVec_" << OpName << "_feedforward.data();\n"; + out << SP << fType << " * " << OpName << "_feedforward = this->fVec_" << OpName << "_feedforward;\n"; else out << SP << fType << " " << OpName << "_feedforward[" << seq_length * batch_size * fAttrHiddenSize << "] = {0};\n"; @@ -395,7 +415,7 @@ auto ROperator_RNN::Generate(std::string OpName) -> std::string out << SP << fType << " *" << OpName << "_hidden_state = tensor_" << fNY << ";\n"; } else { if (fUseSession) - out << SP << fType << " * " << OpName << "_hidden_state = this->fVec_" << OpName << "_hidden_state.data();\n"; + out << SP << fType << " * " << OpName << "_hidden_state = this->fVec_" << OpName << "_hidden_state;\n"; else out << SP << fType << " " << OpName << "_hidden_state[" << seq_length * num_directions * batch_size * fAttrHiddenSize << "] = {0};\n"; diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx index fddc07a85fc08..8769193080b39 100644 --- a/tmva/sofie/inc/TMVA/SOFIE_common.hxx +++ b/tmva/sofie/inc/TMVA/SOFIE_common.hxx @@ -681,16 +681,6 @@ void col2im(const Dtype* data_col, const int channels, //std::cout << "finishing col2imp" << std::endl; } -// Used at the end of infer() to fill the return object. -template -void FillOutput(T const *arr, std::vector &out, std::size_t n) -{ - out.resize(n); - for (std::size_t i = 0; i < n; ++i) { - out[i] = arr[i]; - } -} - } // end namespace UTILITY namespace BLAS{ diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index 0e6a7d7cf9bbf..74e672779fcf5 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -11,14 +11,34 @@ #include "TMVA/RModel.hxx" #include "TMVA/SOFIE_common.hxx" -namespace TMVA { -namespace Experimental { -namespace SOFIE { +namespace TMVA::Experimental::SOFIE { namespace { + const std::string SP = " "; + +void ReplaceAll(std::string &str, const std::string &from, const std::string &to) +{ + size_t pos = 0; + while ((pos = str.find(from, pos)) != std::string::npos) { + str.replace(pos, from.length(), to); + pos += to.length(); + } +} + +bool IsIdentifierChar(char c) +{ + return std::isalnum(static_cast(c)) || c == '_'; } +// Get the data member name corresponding to a tensor with a given name. +std::string TensorMember(std::string const &name) +{ + return "tensor_" + name; +} + +} // namespace + std::underlying_type_t operator|(Options opA, Options opB) { return static_cast>(opA) | static_cast>(opB); } @@ -26,6 +46,7 @@ std::underlying_type_t operator|(std::underlying_type_t opA, O return opA | static_cast>(opB); } + std::vector RModel::GetTensorShape(const std::string & name) const { auto f = fReadyInputTensorInfos.find(name); if (f != fReadyInputTensorInfos.end()) { @@ -356,7 +377,7 @@ std::string RModel::AllocateIntermediateMemory(std::span std::string typeName = ConvertTypeToString(GetTensorType(name)); code << "\n // Allocating memory for intermediate tensor " << name << " with size " << size << " bytes"; code << "\n" - << typeName << "* tensor_" << name << " = reinterpret_cast<" << typeName + << typeName << "* " << TensorMember(name) << " = reinterpret_cast<" << typeName << "*>(fIntermediateMemoryPool.data() + " << location << ");\n"; }; @@ -714,7 +735,8 @@ std::string GenerateConstantTensorCode(const std::pair fTensor_" << t.first << " = "; if (sameData) @@ -722,7 +744,7 @@ std::string GenerateConstantTensorCode(const std::pair(i); - fConstantTensorSize += ConvertShapeToLength(i.second.shape()) * 4; + fConstantTensorSize += length * sizeof(float); } else if (i.second.type() == ETensorType::INT64) { fGC += GenerateConstantTensorCode(i); - fConstantTensorSize += ConvertShapeToLength(i.second.shape()) * 8; + fConstantTensorSize += length * sizeof(int64_t); } } else { // case of tensors which are read from a file - size_t length = ConvertShapeToLength(i.second.shape()); if (i.second.type() == ETensorType::FLOAT) { fGC += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(length) + ");\n"; - fGC += "float * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; - fWeightsTensorSize += ConvertShapeToLength(i.second.shape()) * 4; + fGC += "float * " + TensorMember(i.first) + " = fTensor_" + i.first + ".data();\n"; + fWeightsTensorSize += length * sizeof(float); } } } @@ -774,7 +796,7 @@ void RModel::GenerateIntermediateTensorInfo() { bool is_alias = (IsAliasTensor(i.first)); if (i.second.type == ETensorType::BOOL && !is_alias) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(ConvertShapeToLength(i.second.shape)) + ");\n"; - tensor_declaration_block += "std::uint8_t * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + tensor_declaration_block += "std::uint8_t * " + TensorMember(i.first) + " = fTensor_" + i.first + ".data();\n"; continue; } bool is_extended = (fOptimizationLevel == OptimizationLevel::kExtended); @@ -788,22 +810,22 @@ void RModel::GenerateIntermediateTensorInfo() { if (i.second.type == ETensorType::FLOAT) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(length) + ");\n"; - tensor_declaration_block += "float * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + tensor_declaration_block += "float * " + TensorMember(i.first) + " = fTensor_" + i.first + ".data();\n"; fOtherTensorSize += 4 * length; } else if (i.second.type == ETensorType::DOUBLE) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(length) + ");\n"; - tensor_declaration_block += "double * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + tensor_declaration_block += "double * " + TensorMember(i.first) + " = fTensor_" + i.first + ".data();\n"; fOtherTensorSize += 8 * length; } else if (i.second.type == ETensorType::INT64) { tensor_declaration_block += "std::vector fTensor_" + i.first + " = std::vector(" + std::to_string(length) + ");\n"; - tensor_declaration_block += "int64_t * tensor_" + i.first + " = fTensor_" + i.first + ".data();\n"; + tensor_declaration_block += "int64_t * " + TensorMember(i.first) + " = fTensor_" + i.first + ".data();\n"; fOtherTensorSize += 8 * length; } } if (is_alias) { - tensor_declaration_block += ConvertTypeToString(i.second.type) + " * tensor_" + i.first + " = nullptr;\n"; + tensor_declaration_block += ConvertTypeToString(i.second.type) + " * " + TensorMember(i.first) + " = nullptr;\n"; } } @@ -816,7 +838,7 @@ void RModel::GenerateIntermediateTensorInfo() { if (!fDynamicTensorInfos.empty()) { fGC += "//--- declare the dynamic tensors\n"; for (auto &i : fDynamicTensorInfos) { - fGC += ConvertTypeToString(i.second.type) + " * tensor_" + i.first + " = nullptr;\n"; + fGC += ConvertTypeToString(i.second.type) + " * " + TensorMember(i.first) + " = nullptr;\n"; } fGC += "//--- dynamic tensors pool\n"; fGC += "std::vector fDynamicMemoryPool;\n"; @@ -862,9 +884,10 @@ void RModel::GenerateDynamicTensorInfo() auto op_ptr = op.get(); std::cout << "Looping on operator " << op_index << " " << typeid(*op_ptr).name() << std::endl; } - // check if is a dynamic tensor and not an alias tensor + // check if is a dynamic tensor and not an alias tensor or output tensor std::string name = std::string(it); - if ( fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end() && !IsAliasTensor(name)) { + if ( fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end() && !IsAliasTensor(name) + && std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), name) == fOutputTensorNames.end()) { auto tensor_size = ConvertDimShapeToLength(GetDimTensorShape(name)); auto type = GetTensorType(name); size_t type_size = GetTypeSize(type); @@ -901,6 +924,7 @@ void RModel::GenerateDynamicTensorInfo() bool missingTensor = false; for (auto &i : fDynamicTensorInfos) { if (IsAliasTensor(i.first)) continue; + if (std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), i.first) != fOutputTensorNames.end()) continue; if (std::find(tensors.begin(), tensors.end(), std::pair{i.first, i.second.type}) == tensors.end()) { std::cout << "Dynamic tensors " << i.first << " is not in list of operator input/output " << std::endl; missingTensor = true; @@ -912,6 +936,83 @@ void RModel::GenerateDynamicTensorInfo() fGC += out.str(); } +/// Check if a given parameter is used for the shape of an input tensor. +bool RModel::IsInputTensorShapeParam(std::string const ¶mName) const +{ + for (auto &name : fInputTensorNames) { + if (IsDimInputTensor(name)) { + auto shape = GetDynamicTensorShape(name); + for (auto &d : shape) { + if (d.param == paramName) + return true; + } + } + } + return false; +} + +/// Collects all identifiers starting with "tensor_" in the input code, +/// provided that the occurrence is not immediately preceded by a +/// character that is valid in a C++ identifier. Excludes input and output tensor names. +/// Returns a deduplicated std::vector. +std::vector RModel::CollectTensorMemberNames(const std::string &input) +{ + const std::string target = "tensor_"; + + std::vector result; + + for (size_t i = 0; i < input.size();) { + + bool doCollect = false; + + if (i + target.size() <= input.size() && input.compare(i, target.size(), target) == 0 && + (i == 0 || !IsIdentifierChar(input[i - 1]))) { + + doCollect = true; + + std::size_t j = i + target.size(); + + // Extend to full identifier + while (j < input.size() && IsIdentifierChar(input[j])) + ++j; + + std::string fullName = input.substr(i, j - i); + + // Exclude input tensor names + for (std::string const &name : fInputTensorNames) { + if (fullName == target + name) { + doCollect = false; + break; + } + } + + // Exclude output tensor names + if (doCollect) { + for (std::string const &name : fOutputTensorNames) { + if (fullName == target + name) { + doCollect = false; + break; + } + } + } + + if (doCollect) { + result.push_back(fullName); + } + + i = j; // advance past the identifier + } else { + ++i; + } + } + + // Deduplicate (order not preserved) + std::sort(result.begin(), result.end()); + result.erase(std::unique(result.begin(), result.end()), result.end()); + + return result; +} + std::string RModel::GenerateInferSignature(bool isdecl) { // generate the infer signature given the inputs: eg. "float * tensor1, float * tensor2" // if (decl = false) generate only calling signature (tensor1,tensor2,....) @@ -1004,8 +1105,24 @@ void RModel::GenerateOutput() if (!doInferArgs.empty()) doInferArgs += ","; for (std::string const &name : fOutputTensorNames) { - fGC += SP + "std::vector<" + typeForOutput(GetTensorType(name)) + " > output_tensor_" + name + ";\n"; - doInferArgs += " output_tensor_" + name + ","; + bool isDynamic = fDynamicTensorInfos.count(name) > 0; + std::string n; + if(!isDynamic) { + n = std::to_string(ConvertShapeToLength(GetTensorShape(name))); + } else { + n = memberNameForDimShape(ConvertDimShapeToLength(GetDynamicTensorShape(name))); + } + std::string outputName = "output_tensor_" + name; + fGC += SP + "std::vector<" + typeForOutput(GetTensorType(name)) + " > " + outputName + "(" + n + ");\n"; + doInferArgs += " " + outputName + ".data(),"; + if(isDynamic) { + for (auto const &dim : GetDynamicTensorShape(name)) { + if (dim.isParam && !IsInputTensorShapeParam(dim.param)) { + fGC += SP + "size_t " + dim.param + " = 0;\n"; + doInferArgs += " " + dim.param + ","; + } + } + } } if (!doInferArgs.empty()) doInferArgs.back() = ' '; @@ -1031,7 +1148,21 @@ void RModel::GenerateOutput() } } - fGC += SP + "doInfer(" + doInferArgs + ");\n"; + if (fUseSession) { + fGC += SP + "doInfer(*this, " + doInferArgs + ");\n"; + } else { + fGC += SP + "doInfer(" + doInferArgs + ");\n"; + } + + // If the output tensors have dynamic sizes, now is the time to set them + for (std::string const &name : fOutputTensorNames) { + bool isDynamic = fDynamicTensorInfos.count(name) > 0; + if (isDynamic) { + std::string outputName = "output_tensor_" + name; + auto tensor_size = ConvertDimShapeToLength(GetDimTensorShape(name)); + fGC += " " + outputName + ".resize(" + tensor_size + ");\n"; + } + } fGC += SP + "return {"; for (size_t i = 0; i < fOutputTensorNames.size(); i++) { @@ -1045,23 +1176,43 @@ void RModel::GenerateOutput() void RModel::GenerateSessionCode() { + std::string sessionName = !fIsSubGraph ? "Session" : "Session_" + fName; + + if (fUseSession && !fIsGNNComponent) { + // forward declare session struct + fGC += "struct " + sessionName + ";\n"; + } + // Determine the signature of the actual inference function std::string doInferSignature = GenerateInferSignature(); if (!doInferSignature.empty()) doInferSignature += ", "; for (auto const &name : fOutputTensorNames) { - doInferSignature += " std::vector<" + typeForOutput(GetTensorType(name)) + "> &output_tensor_" + name + ","; + bool isDynamic = fDynamicTensorInfos.count(name) > 0; + doInferSignature += typeForOutput(GetTensorType(name)) + " *tensor_" + name + ","; + if(isDynamic) { + for (auto const &dim : GetDynamicTensorShape(name)) { + if (dim.isParam && !IsInputTensorShapeParam(dim.param)) + doInferSignature += " size_t &" + dim.param + "_output,"; + } + } } doInferSignature.back() = ' '; - doInferSignature = "void doInfer(" + doInferSignature + ")"; + if (fUseSession) { + doInferSignature = sessionName + " const &session, " + doInferSignature; + } + + doInferSignature = "inline void doInfer(" + doInferSignature + ")"; + + if (!fIsGNNComponent) { + // forward declare inference implementation + fGC += doInferSignature + ";\n"; + } // define the Session struct (for GNN this is generated in RModel_GNN) if (fUseSession && !fIsGNNComponent) { - if (!fIsSubGraph) - fGC += "struct Session {\n"; - else - fGC += "struct Session_" + fName + " {\n"; + fGC += "struct " + sessionName + " {\n"; } // generate code for declaring the initialized tensors @@ -1115,9 +1266,6 @@ void RModel::GenerateSessionCode() // Generate code for Session constructor if (fUseSession) { - std::string sessionName = "Session"; - if (fIsSubGraph) - sessionName += "_" + fName; // add here specific operator code that needs to define session data members fGC += "\n"; for (size_t id = 0; id < fOperators.size(); id++) { @@ -1152,11 +1300,6 @@ void RModel::GenerateSessionCode() } fGC += ") {\n"; - // add some code required in session constructor - for (size_t id = 0; id < fOperators.size(); id++) { - fGC += fOperators[id]->GenerateSessionCtorCode(); - } - // initializing dynamic parameters if (!fDimShapeNames.empty()) { fGC += "\n\n"; @@ -1184,7 +1327,15 @@ void RModel::GenerateSessionCode() fGC += "}\n\n"; } - fGC += doInferSignature + "{\n"; + // generate the inference overload that returns an output struct + GenerateOutput(); + + // end of session + if (fUseSession && !fIsGNNComponent) { + fGC += "}; // end of Session\n\n"; + } + + fGC += doInferSignature + " {\n"; fGC += "\n"; // generate the inference code @@ -1194,32 +1345,47 @@ void RModel::GenerateSessionCode() if (fOutputTensorNames.size() == 0) throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported"); + std::string allOperatorCode; + for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) { if (fVerbose) std::cout << "Generating code for operator .... " << op_idx << std::endl; - fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx))); + std::string operatorCode = fOperators[op_idx]->Generate(std::to_string(op_idx)); + allOperatorCode += operatorCode; } - fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n"; + // If the generated code users members of the session struct, use the + // local variable name that we're using for the session: + ReplaceAll(allOperatorCode, "this->", "session."); - for (std::string const &name : fOutputTensorNames) { - // need to check is size is the same (don't want to return a vector with - // larger size) in that case better to copy - bool isIntermediate = fIntermediateTensorInfos.count(name) > 0; - std::string n = isIntermediate ? std::to_string(ConvertShapeToLength(GetTensorShape(name))) - : ConvertDimShapeToLength(GetDimTensorShape(name)); - fGC += SP + "FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n"; + if (fUseSession && !fIsGNNComponent) { + // Collect all "tensor_*" data members that are not input or output tensors + std::vector tensorMemberNames = CollectTensorMemberNames(allOperatorCode); + for (auto const& name: tensorMemberNames) { + fGC += " auto &" + name + " = session." + name + ";\n"; + } + fGC += "\n"; } - fGC += "}\n\n"; - - // generate the inference overload that returns an output struct - GenerateOutput(); + fGC += allOperatorCode; - // end of session - if (fUseSession && !fIsGNNComponent) { - fGC += "}; // end of Session\n\n"; + for (auto const& name: fOutputTensorNames) { + bool isDynamic = fDynamicTensorInfos.count(name) > 0; + if(isDynamic) { + for (auto const &dim : GetDynamicTensorShape(name)) { + if (dim.isParam && !IsInputTensorShapeParam(dim.param)) + fGC += " " + dim.param + "_output = " + dim.param + ";\n"; + } + } + if(IsConstantTensor(name)) { + std::string t = "session.tensor_" + name; + size_t length = ConvertShapeToLength(fInitializedTensors[name].shape()); + fGC += " std::copy(" + t + ", " + t + " + " + std::to_string(length) + ", tensor_" + name + ");\n"; + } } + fGC += "\n"; + + fGC += "}\n"; } void RModel::Generate(std::underlying_type_t options, int batchSize, long pos, bool verbose) @@ -1653,6 +1819,4 @@ void RModel::Streamer(TBuffer &R__b) { } } -}//SOFIE -}//Experimental -}//TMVA +} // namespace SOFIE::Experimental::TMVA diff --git a/tmva/sofie/test/CMakeLists.txt b/tmva/sofie/test/CMakeLists.txt index 1a9295237cff1..120b5800b2f8d 100644 --- a/tmva/sofie/test/CMakeLists.txt +++ b/tmva/sofie/test/CMakeLists.txt @@ -48,14 +48,20 @@ ROOTTEST_ADD_TEST(SofieCompileModels_ONNX # Creating a Google Test if (BLAS_FOUND) # we need BLAS for compiling the models ROOT_EXECUTABLE(TestCustomModelsFromONNX TestCustomModelsFromONNX.cxx - LIBRARIES - Core - GTest::gtest - GTest::gtest_main + LIBRARIES Core GTest::gtest GTest::gtest_main ) ROOTTEST_ADD_TEST(TestCustomModelsFromONNX EXEC ./TestCustomModelsFromONNX FIXTURES_REQUIRED sofie-compile-models-onnx) + + if (clad) + ROOT_EXECUTABLE(TestCladAutodiff TestCladAutodiff.cxx + LIBRARIES Core GTest::gtest GTest::gtest_main + ) + ROOTTEST_ADD_TEST(TestCladAutodiff + EXEC ./TestCladAutodiff + FIXTURES_REQUIRED sofie-compile-models-onnx) + endif() endif() # For testing serialisation of RModel object diff --git a/tmva/sofie/test/TestCladAutodiff.cxx b/tmva/sofie/test/TestCladAutodiff.cxx new file mode 100644 index 0000000000000..bdd040e4ea03f --- /dev/null +++ b/tmva/sofie/test/TestCladAutodiff.cxx @@ -0,0 +1,111 @@ +constexpr auto modelHeaderSuffix = "_FromONNX.hxx"; +constexpr auto modelDataSuffix = "_FromONNX.dat"; +#include "test_helpers.h" + +#include "input_models/references/Linear_16.ref.hxx" + +#include "gtest/gtest.h" + +// Test differentiating a fully-connected neural network with Clad. +// Extension of the ONNX.Linear16 test in TestCustomModelsFromONNX.cxx +TEST(ONNXClad, Linear16) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + // Preparing the standard all-ones input + std::vector input(1600); + std::fill_n(input.data(), input.size(), 1.0f); + + ASSERT_INCLUDE_AND_RUN(std::vector, "Linear_16", input); + + gInterpreter->Declare(R"( +#include + +float Linear_16_wrapper(TMVA_SOFIE_Linear_16::Session const &session, float const *input) +{ + float out[160]{}; + float output_sum = 0.0; + + TMVA_SOFIE_Linear_16::doInfer(session, input, out); + + for (std::size_t i = 0; i < std::size(out); ++i) { + output_sum += out[i]; + } + return output_sum; +} + +float Linear_16_outer_wrapper(TMVA_SOFIE_Linear_16::Session const &session, float const *input) +{ + return Linear_16_wrapper(session, input); +} + +float Linear_16_wrapper_num_diff(TMVA_SOFIE_Linear_16::Session const &session, float *input, std::size_t i) +{ + const float origVal = input[i]; + + const float eps = 1e-3; + input[i] = origVal - eps; + float funcValDown = Linear_16_wrapper(session, input); + input[i] = origVal + eps; + float funcValUp = Linear_16_wrapper(session, input); + input[i] = origVal; + + return (funcValUp - funcValDown) / (2 * eps); +} + )"); + + auto inputInterp = toInterpreter(input, "std::vector", true); + + // Why do we have two wrappers, the <>_wrapper and the <>_outer_wrapper? + // This is because we are not interested in the created gradient function. + // We are interested in the more low-level *pullback* function, which takes + // also the data structures for the reverse pass as function arguments. Like + // this, we can initialize the session for the backward pass once and re-use + // it. The trick to get the wrapper pullback is to create another wrapper + // around the wrapper, and creating the gradient for the outer wrapper + // implicitly creates the pullback for the inner wrapper. + gInterpreter->ProcessLine("clad::gradient(Linear_16_outer_wrapper, \"input\");"); + + // Create two session data structures: one for the forward, and one for the backward pass + gInterpreter->ProcessLine("TMVA_SOFIE_Linear_16::Session session_linear_16{\"Linear_16_FromONNX.dat\"};"); + gInterpreter->ProcessLine("TMVA_SOFIE_Linear_16::Session _d_session_linear_16{\"Linear_16_FromONNX.dat\"};"); + + gInterpreter->ProcessLine("float grad_output[1600]{};"); + gInterpreter->ProcessLine( + ("Linear_16_wrapper_pullback(session_linear_16, " + inputInterp + ", 1, &_d_session_linear_16, grad_output)") + .c_str()); + + // If you want to see the gradient code: + // gInterpreter->ProcessLine("static_cast(Linear_16_outer_wrapper_grad_1)"); gInterpreter->ProcessLine("Linear_16_wrapper_pullback"); + // gInterpreter->ProcessLine("TMVA_SOFIE_Linear_16::doInfer_reverse_forw"); + // gInterpreter->ProcessLine("TMVA_SOFIE_Linear_16::doInfer_pullback"); + + auto retVal = gInterpreter->ProcessLine((R"( + double maxDiff = 0; + for (std::size_t i = 0; i < std::size(grad_output); ++i) { + double val = grad_output[i]; + double ref = Linear_16_wrapper_num_diff(session_linear_16, )" + + inputInterp + R"(, i); + if (val != ref) { + maxDiff = std::max(std::abs(val - ref), maxDiff); + } + } + double tol = 0.0025; + // the "return" value + (maxDiff < tol); + )") + .c_str()); + + EXPECT_EQ(retVal, 1) << "The gradient from Clad and the numeric gradient didn't match within tolerance."; + + // Checking output size + EXPECT_EQ(output.size(), sizeof(Linear_16_ExpectedOutput::all_ones) / sizeof(float)); + + float *correct = Linear_16_ExpectedOutput::all_ones; + + // Checking every output value, one by one + for (size_t i = 0; i < output.size(); ++i) { + EXPECT_LE(std::abs(output[i] - correct[i]), TOLERANCE); + } +}