@@ -6,39 +6,102 @@ namespace infinilm::layers {
66// ---------------------------------------------------------
77// QKV Parallel Linear
88// ---------------------------------------------------------
9+ /* *
10+ * @deprecated This function is deprecated and will be REMOVED in the next major release (v0.2.0).
11+ *
12+ * ⚠️ DEVELOPMENT POLICY:
13+ * - NO new development or feature additions permitted on this interface
14+ * - Only critical bug fixes (security/stability) allowed until removal
15+ * - All new code MUST migrate to the polymorphic overload below
16+ *
17+ * Replacement: Use the polymorphic overload of this same function name with updated signature
18+ * Reason: Legacy signature lacks support for dynamic quantization modes.
19+ * Removal target: v0.2.0 (Q2 2026)
20+ */
921QKVParallelLinear::QKVParallelLinear (size_t hidden_size,
1022 size_t head_dim,
1123 size_t num_q_head,
1224 size_t num_kv_head,
1325 bool bias,
1426 const infinicore::DataType &dtype,
1527 const infinicore::Device &device,
16- engine::distributed::RankInfo rank_info,
17- std::optional<infinicore::nn::QuantScheme> quant_scheme)
28+ engine::distributed::RankInfo rank_info)
1829 : QKVParallelLinear(hidden_size,
1930 head_dim, head_dim, head_dim,
2031 num_q_head, num_kv_head, num_kv_head,
2132 bias, bias, bias,
22- dtype, device, rank_info,
23- quant_scheme) {}
33+ dtype, device, rank_info) {}
2434
2535QKVParallelLinear::QKVParallelLinear (size_t hidden_size,
2636 size_t q_dim, size_t k_dim, size_t v_dim,
2737 size_t num_q_head, size_t num_k_head, size_t num_v_head,
2838 bool q_bias, bool k_bias, bool v_bias,
2939 const infinicore::DataType &dtype,
3040 const infinicore::Device &device,
31- engine::distributed::RankInfo rank_info,
32- std::optional<infinicore::nn::QuantScheme> quant_scheme)
41+ engine::distributed::RankInfo rank_info)
3342 : infinicore::nn::ColumnParallelLinear(
3443 hidden_size,
3544 num_q_head * q_dim + num_k_head * k_dim + num_v_head * v_dim,
3645 (q_bias || k_bias || v_bias),
3746 dtype,
3847 device,
3948 rank_info.tp_rank,
40- rank_info.tp_size,
41- quant_scheme),
49+ rank_info.tp_size),
50+ q_dim_ (q_dim),
51+ k_dim_(k_dim),
52+ v_dim_(v_dim),
53+ num_q_head_(num_q_head),
54+ num_k_head_(num_k_head),
55+ num_v_head_(num_v_head),
56+ q_bias_(q_bias),
57+ k_bias_(k_bias),
58+ v_bias_(v_bias) {
59+ if (num_q_head % tp_size_ != 0 || num_k_head % tp_size_ != 0 || num_v_head % tp_size_ != 0 ) {
60+ throw std::runtime_error (" QKVParallelLinear: num_[q|k|v]_head must be divisible by tp_size" );
61+ }
62+
63+ if ((q_bias_ != k_bias_) || (k_bias_ != v_bias_)) {
64+ throw std::runtime_error (" q_bias, k_bias, v_bias must all match" );
65+ }
66+
67+ q_out_size_ = num_q_head_ * q_dim_ / tp_size_;
68+ k_out_size_ = num_k_head_ * k_dim_ / tp_size_;
69+ v_out_size_ = num_v_head_ * v_dim_ / tp_size_;
70+ }
71+
72+ QKVParallelLinear::QKVParallelLinear (size_t hidden_size,
73+ size_t head_dim,
74+ size_t num_q_head,
75+ size_t num_kv_head,
76+ infinicore::nn::QuantScheme quant_scheme,
77+ bool bias,
78+ const infinicore::DataType &dtype,
79+ const infinicore::Device &device,
80+ engine::distributed::RankInfo rank_info)
81+ : QKVParallelLinear(hidden_size,
82+ head_dim, head_dim, head_dim,
83+ num_q_head, num_kv_head, num_kv_head,
84+ bias, bias, bias,
85+ quant_scheme,
86+ dtype, device, rank_info) {}
87+
88+ QKVParallelLinear::QKVParallelLinear (size_t hidden_size,
89+ size_t q_dim, size_t k_dim, size_t v_dim,
90+ size_t num_q_head, size_t num_k_head, size_t num_v_head,
91+ bool q_bias, bool k_bias, bool v_bias,
92+ infinicore::nn::QuantScheme quant_scheme,
93+ const infinicore::DataType &dtype,
94+ const infinicore::Device &device,
95+ engine::distributed::RankInfo rank_info)
96+ : infinicore::nn::ColumnParallelLinear(
97+ hidden_size,
98+ num_q_head * q_dim + num_k_head * k_dim + num_v_head * v_dim,
99+ quant_scheme,
100+ (q_bias || k_bias || v_bias),
101+ dtype,
102+ device,
103+ rank_info.tp_rank,
104+ rank_info.tp_size),
42105 q_dim_(q_dim),
43106 k_dim_(k_dim),
44107 v_dim_(v_dim),
@@ -141,18 +204,44 @@ bool QKVParallelLinear::has_v_bias() const { return v_bias_; }
141204// ---------------------------------------------------------
142205// Gate-Up Parallel Linear
143206// ---------------------------------------------------------
207+ /* *
208+ * @deprecated This function is deprecated and will be REMOVED in the next major release (v0.2.0).
209+ *
210+ * ⚠️ DEVELOPMENT POLICY:
211+ * - NO new development or feature additions permitted on this interface
212+ * - Only critical bug fixes (security/stability) allowed until removal
213+ * - All new code MUST migrate to the polymorphic overload below
214+ *
215+ * Replacement: Use the polymorphic overload of this same function name with updated signature
216+ * Reason: Legacy signature lacks support for dynamic quantization modes.
217+ * Removal target: v0.2.0 (Q2 2026)
218+ */
144219GateUpParallelLinear::GateUpParallelLinear (size_t hidden_size, size_t intermediate_size, bool bias,
145220 const infinicore::DataType &dtype, const infinicore::Device &device,
146- engine::distributed::RankInfo rank_info,
147- std::optional<infinicore::nn::QuantScheme> quant_scheme)
148- : GateUpParallelLinear(hidden_size, intermediate_size, bias, bias, dtype, device, rank_info, quant_scheme) {
221+ engine::distributed::RankInfo rank_info)
222+ : GateUpParallelLinear(hidden_size, intermediate_size, bias, bias, dtype, device, rank_info) {
223+ }
224+
225+ GateUpParallelLinear::GateUpParallelLinear (size_t hidden_size, size_t intermediate_size, bool gate_bias, bool up_bias,
226+ const infinicore::DataType &dtype, const infinicore::Device &device,
227+ engine::distributed::RankInfo rank_info)
228+ : infinicore::nn::ColumnParallelLinear(hidden_size, intermediate_size * 2 , gate_bias || up_bias, dtype, device, rank_info.tp_rank, rank_info.tp_size), gate_bias_(gate_bias), up_bias_(up_bias) {
229+ if (gate_bias_ != up_bias_) {
230+ throw std::runtime_error (" Not supported yet: gate_bias and up_bias should be given at the same time" );
231+ }
232+ }
233+
234+ GateUpParallelLinear::GateUpParallelLinear (size_t hidden_size, size_t intermediate_size, infinicore::nn::QuantScheme quant_scheme, bool bias,
235+ const infinicore::DataType &dtype, const infinicore::Device &device,
236+ engine::distributed::RankInfo rank_info)
237+ : GateUpParallelLinear(hidden_size, intermediate_size, bias, bias, quant_scheme, dtype, device, rank_info) {
149238}
150239
151240GateUpParallelLinear::GateUpParallelLinear (size_t hidden_size, size_t intermediate_size, bool gate_bias, bool up_bias,
241+ infinicore::nn::QuantScheme quant_scheme,
152242 const infinicore::DataType &dtype, const infinicore::Device &device,
153- engine::distributed::RankInfo rank_info,
154- std::optional<infinicore::nn::QuantScheme> quant_scheme)
155- : infinicore::nn::ColumnParallelLinear(hidden_size, intermediate_size * 2 , gate_bias || up_bias, dtype, device, rank_info.tp_rank, rank_info.tp_size, quant_scheme), gate_bias_(gate_bias), up_bias_(up_bias) {
243+ engine::distributed::RankInfo rank_info)
244+ : infinicore::nn::ColumnParallelLinear(hidden_size, intermediate_size * 2 , quant_scheme, gate_bias || up_bias, dtype, device, rank_info.tp_rank, rank_info.tp_size), gate_bias_(gate_bias), up_bias_(up_bias) {
156245 if (gate_bias_ != up_bias_) {
157246 throw std::runtime_error (" Not supported yet: gate_bias and up_bias should be given at the same time" );
158247 }
0 commit comments