Skip to content

Commit 6412aff

Browse files
committed
[slim tensor migration 1/n] introduce slimtensor required c10 functions
This stack aims to migrate slim tensor into ExecuTorch stack to make it as internal tensor representation of cudabackend. This diff introduce slimtensor required c10 dependencies into ExecuTorch by copy and paste c10 headers slim tensor needs but not show in the ExecuTorch stack. Note that to support slimtensor first, in this diff we just copy and paste required c10 files, but not making it the same as current c10 in pytorch. We will try to sync it with latest c10 and move them into `executorch/runtime/core/portable_type/c10/c10/` after slim tensor migration done. Differential Revision: [D89417354](https://our.internmc.facebook.com/intern/diff/D89417354/) ghstack-source-id: 330099391 Pull Request resolved: #16304
1 parent 12ace93 commit 6412aff

File tree

6 files changed

+945
-0
lines changed

6 files changed

+945
-0
lines changed
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <c10/util/irange.h>
12+
#include <executorch/runtime/core/array_ref.h>
13+
14+
#include <algorithm>
15+
#include <cstdint>
16+
#include <vector>
17+
18+
namespace c10 {
19+
20+
using ::executorch::runtime::ArrayRef;
21+
22+
template <typename T>
23+
bool _compute_contiguous(ArrayRef<T> sizes, ArrayRef<T> strides, T numel) {
24+
if (numel == 0) {
25+
return true;
26+
}
27+
28+
T expected_stride = 1;
29+
// NB: make sure we do signed arithmetic
30+
for (int64_t d = int64_t(sizes.size()) - 1; d >= 0; d--) {
31+
const auto& size_d = sizes[d];
32+
if (size_d == 1) {
33+
continue;
34+
}
35+
36+
if (strides[d] != expected_stride) {
37+
return false;
38+
}
39+
expected_stride *= size_d;
40+
}
41+
return true;
42+
}
43+
44+
// This function will return True if the tensor is contiguous, and False if the
45+
// its not or if we can't determine if it is contiguous due to unbacked symbols
46+
// (it could be either in that case based on the actual runtime data).
47+
template <typename T>
48+
bool definitely_contiguous(ArrayRef<T> sizes, ArrayRef<T> strides, T numel) {
49+
if (numel == 0) {
50+
return true;
51+
}
52+
53+
T expected_stride = 1;
54+
// NB: make sure we do signed arithmetic
55+
for (int64_t d = int64_t(sizes.size()) - 1; d >= 0; d--) {
56+
const auto& size_d = sizes[d];
57+
if (size_d == 1) {
58+
continue;
59+
}
60+
61+
if (strides[d] != expected_stride) {
62+
return false;
63+
}
64+
expected_stride *= size_d;
65+
}
66+
return true;
67+
}
68+
69+
template <typename T>
70+
bool _compute_channels_last_contiguous_2d(
71+
ArrayRef<T> sizes,
72+
ArrayRef<T> strides) {
73+
// Please don't combine these code, constant array is used here to let
74+
// compiler fully unroll the loop to get better performance
75+
switch (sizes.size()) {
76+
case 4: {
77+
T expected = 1;
78+
for (auto& d : {1, 3, 2, 0}) {
79+
const auto& size_d = sizes[d];
80+
if (size_d != 1) {
81+
if (strides[d] != expected) {
82+
return false;
83+
}
84+
expected *= size_d;
85+
}
86+
}
87+
return true;
88+
}
89+
// NOLINTNEXTLINE(bugprone-branch-clone)
90+
case 3:
91+
// TODO dim == 3 case will be enabled once it is fully tested
92+
return false;
93+
default:
94+
return false;
95+
}
96+
}
97+
98+
template <typename T>
99+
bool _compute_channels_last_contiguous_3d(
100+
ArrayRef<T> sizes,
101+
ArrayRef<T> strides) {
102+
// Please don't combine these code, constant array is used here to let
103+
// compiler fully unroll the loop to get better performance
104+
switch (sizes.size()) {
105+
case 5: {
106+
T expected = 1;
107+
for (auto& d : {1, 4, 3, 2, 0}) {
108+
const auto& size_d = sizes[d];
109+
if (size_d != 1) {
110+
if (strides[d] != expected) {
111+
return false;
112+
}
113+
expected *= size_d;
114+
}
115+
}
116+
return true;
117+
}
118+
// NOLINTNEXTLINE(bugprone-branch-clone)
119+
case 4:
120+
// TODO dim == 4 case will be enabled once it is fully tested
121+
return false;
122+
default:
123+
return false;
124+
}
125+
}
126+
127+
template <typename T>
128+
bool _compute_non_overlapping_and_dense(
129+
ArrayRef<T> sizes,
130+
ArrayRef<T> strides) {
131+
auto dim = sizes.size();
132+
if (dim == 1) {
133+
return sizes[0] < 2 || strides[0] == 1;
134+
}
135+
std::vector<int64_t> perm(dim);
136+
for (const auto i : c10::irange(dim)) {
137+
perm[i] = i;
138+
}
139+
// Sort by strides, leaving 0 and 1 sized dims at the end of the array
140+
std::sort(perm.begin(), perm.end(), [&](int64_t a, int64_t b) {
141+
if (sizes[a] < 2) {
142+
return false;
143+
} else if (sizes[b] < 2) {
144+
return true;
145+
}
146+
return strides[a] < strides[b];
147+
});
148+
T require_stride = 1;
149+
for (const auto i : c10::irange(dim)) {
150+
const auto& size_perm_i = sizes[perm[i]];
151+
if (size_perm_i < 2) {
152+
return true;
153+
}
154+
if (strides[perm[i]] != require_stride) {
155+
return false;
156+
}
157+
require_stride *= size_perm_i;
158+
}
159+
return true;
160+
}
161+
162+
} // namespace c10

0 commit comments

Comments
 (0)