Skip to content

Commit 5753bd1

Browse files
committed
remove ::kernels
1 parent 8806b02 commit 5753bd1

File tree

62 files changed

+207
-208
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+207
-208
lines changed

torchao/csrc/cpu/shared_kernels/benchmarks/benchmark_linear_8bit_act_xbit_weight.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ template <int weight_nbit, bool has_weight_zeros, bool has_bias, bool has_clamp>
1818
UKernelConfig get_ukernel_config() {
1919
UKernelConfig config;
2020

21-
namespace ukernel = torchao::kernels::cpu::aarch64::linear::
21+
namespace ukernel = torchao::cpu::aarch64::linear::
2222
channelwise_8bit_activation_groupwise_lowbit_weight_1x8x16_f32_neondot;
2323
config.mr = 1;
2424
config.nr = 8;

torchao/csrc/cpu/shared_kernels/embedding_xbit/op_embedding_xbit-impl.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ Tensor embedding_out_cpu(
133133
}
134134
TORCHAO_CHECK(index >= 0 && index < num_embeddings, "index out of bounds");
135135
#if defined(TORCHAO_BUILD_CPU_AARCH64)
136-
torchao::kernels::cpu::aarch64::embedding::embedding<weight_nbit>(
136+
torchao::cpu::aarch64::embedding::embedding<weight_nbit>(
137137
out.mutable_data_ptr<float>() + idx * embedding_dim,
138138
embedding_dim,
139139
group_size,
@@ -199,7 +199,7 @@ Tensor pack_embedding_cpu(const Tensor& weight_qvals) {
199199

200200
torchao::parallel_1d(0, num_embeddings, [&](int64_t idx) {
201201
#if defined(TORCHAO_BUILD_CPU_AARCH64)
202-
torchao::kernels::cpu::aarch64::embedding::pack_embedding_weight_qvals<
202+
torchao::cpu::aarch64::embedding::pack_embedding_weight_qvals<
203203
weight_nbit>(
204204
out.mutable_data_ptr<int8_t>() +
205205
torchao::ops::PackedWeightsHeader::size(),
@@ -289,7 +289,7 @@ Tensor shared_embedding_out_cpu(
289289
}
290290
TORCHAO_CHECK(index >= 0 && index < n, "index out of bounds");
291291
#if defined(TORCHAO_BUILD_CPU_AARCH64)
292-
torchao::kernels::cpu::aarch64::embedding::
292+
torchao::cpu::aarch64::embedding::
293293
shared_embedding<weight_nbit, nr, kr, sr>(
294294
out.mutable_data_ptr<float>() + idx * k,
295295
packed_weights.const_data_ptr<int8_t>() +

torchao/csrc/cpu/shared_kernels/groupwise_lowbit_weight_lut/kernel_selector.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ void register_ukernel_config(
117117
int preferred_alignment = 16;
118118

119119
namespace kernel_api =
120-
torchao::kernels::cpu::aarch64::linear::groupwise_lowbit_weight_lut;
120+
torchao::cpu::aarch64::linear::groupwise_lowbit_weight_lut;
121121

122122
using kernel_fn_ptr_t =
123123
decltype(&kernel_api::groupwise_lowbit_weight_lut_kernel_1x4x32<

torchao/csrc/cpu/shared_kernels/linear_8bit_act_xbit_weight/kernel_selector.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ void register_ukernel_config_universal(
9797
torchao::ops::PackedWeightsType::linear_8bit_act_xbit_weight_universal,
9898
weight_nbit);
9999

100-
namespace kernel = torchao::kernels::cpu::aarch64::linear::
100+
namespace kernel = torchao::cpu::aarch64::linear::
101101
channelwise_8bit_activation_groupwise_lowbit_weight;
102102

103103
constexpr bool has_lut = false;
@@ -181,7 +181,7 @@ void register_ukernel_config_lut(
181181
int preferred_alignment = 16;
182182

183183
#if defined(TORCHAO_ENABLE_ARM_NEON_DOT)
184-
namespace kernel = torchao::kernels::cpu::aarch64::linear::
184+
namespace kernel = torchao::cpu::aarch64::linear::
185185
channelwise_8bit_activation_groupwise_lowbit_weight;
186186

187187
if (!cpuinfo_has_arm_neon_dot()) {
@@ -232,7 +232,7 @@ void register_ukernel_config_lut(
232232
template <typename kernel_struct>
233233
UKernelConfig::linear_config_type
234234
get_linear_config_kleidi(int n_step, int nr, int kr, int sr) {
235-
namespace op = torchao::kernels::cpu::aarch64::kleidi::
235+
namespace op = torchao::cpu::aarch64::kleidi::
236236
kai_matmul_clamp_f32_qai8dxp_qsi4c32p;
237237
assert(n_step == kernel_struct::get_ukernel().get_n_step());
238238
assert(nr == kernel_struct::get_ukernel().get_nr());
@@ -256,7 +256,7 @@ void register_ukernel_config_kleidi(
256256
throw std::runtime_error("Failed to initialize cpuinfo!");
257257
}
258258
check_format(format, torchao::ops::PackedWeightsType::linear_8bit_act_xbit_weight_kleidi_ai, weight_nbit);
259-
namespace op = torchao::kernels::cpu::aarch64::kleidi::
259+
namespace op = torchao::cpu::aarch64::kleidi::
260260
kai_matmul_clamp_f32_qai8dxp_qsi4c32p;
261261

262262
auto uk = UKernelConfig::make(

torchao/csrc/cpu/shared_kernels/tests/test_groupwise_lowbit_weight_lut.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ using namespace torchao::ops::groupwise_lowbit_weight_lut;
1919
template <int weight_nbit, bool has_scales>
2020
UKernelConfig get_ukernel_config(bool has_bias) {
2121
namespace kernel =
22-
torchao::kernels::cpu::aarch64::linear::groupwise_lowbit_weight_lut;
22+
torchao::cpu::aarch64::linear::groupwise_lowbit_weight_lut;
2323

2424
int preferred_alignment = 16;
2525
int n_step = 8;

torchao/csrc/cpu/shared_kernels/tests/test_linear_8bit_act_xbit_weight.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
#if defined(TORCHAO_ENABLE_KLEIDI)
1818
#include <torchao/csrc/cpu/torch_free_kernels/aarch64/kleidi/kai_matmul_clamp_f32_qai8dxp_qsi4c32p.h>
19-
using namespace torchao::kernels::cpu::aarch64::kleidi::
19+
using namespace torchao::cpu::aarch64::kleidi::
2020
kai_matmul_clamp_f32_qai8dxp_qsi4c32p;
2121
#endif // TORCHAO_ENABLE_KLEIDI
2222

@@ -27,7 +27,7 @@ using namespace torchao::ops::linear_8bit_act_xbit_weight;
2727

2828
template <int weight_nbit, bool has_weight_zeros, bool has_bias, bool has_clamp, bool has_lut = false>
2929
UKernelConfig get_ukernel_config() {
30-
namespace kernel = torchao::kernels::cpu::aarch64::linear::
30+
namespace kernel = torchao::cpu::aarch64::linear::
3131
channelwise_8bit_activation_groupwise_lowbit_weight;
3232

3333
int preferred_alignment = 16;
@@ -213,7 +213,7 @@ enum kai_kernel_id {
213213

214214
template <typename kernel_struct>
215215
UKernelConfig get_ukernel_config_kleidi_impl() {
216-
namespace op = torchao::kernels::cpu::aarch64::kleidi::
216+
namespace op = torchao::cpu::aarch64::kleidi::
217217
kai_matmul_clamp_f32_qai8dxp_qsi4c32p;
218218

219219
auto uk = kernel_struct::get_ukernel();

torchao/csrc/cpu/torch_free_kernels/aarch64/benchmarks/benchmark_linear.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ channelwise_8bit_activation_groupwise_lowbit_weight_1x1x32_f32_neondot(
1919
int k = state.range(2);
2020
int group_size = state.range(3);
2121

22-
using namespace torchao::kernels::cpu::aarch64::linear::
22+
using namespace torchao::cpu::aarch64::linear::
2323
channelwise_8bit_activation_groupwise_lowbit_weight_1x1x32_f32_neondot;
2424

2525
auto test_case = torchao::
@@ -91,7 +91,7 @@ channelwise_8bit_activation_groupwise_lowbit_weight_1x4x16_f32_neondot(
9191
int k = state.range(2);
9292
int group_size = state.range(3);
9393

94-
using namespace torchao::kernels::cpu::aarch64::linear::
94+
using namespace torchao::cpu::aarch64::linear::
9595
channelwise_8bit_activation_groupwise_lowbit_weight;
9696

9797
auto test_case = torchao::
@@ -163,7 +163,7 @@ channelwise_8bit_activation_groupwise_lowbit_weight_1x8x16_f32_neondot(
163163
int k = state.range(2);
164164
int group_size = state.range(3);
165165

166-
using namespace torchao::kernels::cpu::aarch64::linear::
166+
using namespace torchao::cpu::aarch64::linear::
167167
channelwise_8bit_activation_groupwise_lowbit_weight;
168168

169169
auto test_case = torchao::

torchao/csrc/cpu/torch_free_kernels/aarch64/benchmarks/benchmark_quantization.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ static void benchmark_quantize(benchmark::State& state) {
2121
float vmin, vmax, scale;
2222

2323
for (auto _ : state) {
24-
torchao::kernels::cpu::aarch64::reduction::find_min_and_max(
24+
torchao::cpu::aarch64::reduction::find_min_and_max(
2525
vmin, vmax, vals.data(), vals.size());
2626

2727
torchao::quantization::get_qvals_range(
@@ -30,7 +30,7 @@ static void benchmark_quantize(benchmark::State& state) {
3030
torchao::quantization::get_scale_and_zero(
3131
scale, zero, vmin, vmax, qmin, qmax);
3232

33-
torchao::kernels::cpu::aarch64::quantization::quantize(
33+
torchao::cpu::aarch64::quantization::quantize(
3434
qvals.data(), vals.data(), vals.size(), scale, zero, qmin, qmax);
3535
}
3636
}

torchao/csrc/cpu/torch_free_kernels/aarch64/embedding/embedding.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
#include <cassert>
1616
#include <vector>
1717

18-
namespace torchao::kernels::cpu::aarch64::embedding {
18+
namespace torchao::cpu::aarch64::embedding {
1919

2020
namespace internal {
2121

@@ -353,7 +353,7 @@ inline void shared_embedding(
353353
n_idx = n_idx * nr;
354354
int j = index - n_idx;
355355

356-
torchao::kernels::cpu::aarch64::linear::
356+
torchao::cpu::aarch64::linear::
357357
channelwise_8bit_activation_groupwise_lowbit_weight::weight_packing::
358358
unpack_weights_at_n_idx<weight_nbit, nr, kr, sr>(
359359
weight_qvals.data(),
@@ -381,6 +381,6 @@ inline void shared_embedding(
381381
}
382382
}
383383

384-
} // namespace torchao::kernels::cpu::aarch64::embedding
384+
} // namespace torchao::cpu::aarch64::embedding
385385

386386
#endif // defined(__aarch64__) || defined(__ARM_NEON)

torchao/csrc/cpu/torch_free_kernels/aarch64/embedding/embedding_lut.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#include <cassert>
1515
#include <vector>
1616

17-
namespace torchao::kernels::cpu::aarch64::embedding {
17+
namespace torchao::cpu::aarch64::embedding {
1818

1919
/**
2020
* @brief Calculates the size in bytes for a single row of packed embeddings.
@@ -377,6 +377,6 @@ inline void dequantize_embedding_row_at_idx_lut(
377377
vst1q_f32(out + j + 12, out3);
378378
}
379379
}
380-
} // namespace torchao::kernels::cpu::aarch64::embedding
380+
} // namespace torchao::cpu::aarch64::embedding
381381

382382
#endif // defined(__aarch64__) || defined(__ARM_NEON)

0 commit comments

Comments
 (0)