Skip to content

Commit ec3b3f4

Browse files
committed
Add device type validation in aoti_torch_create_tensor_from_blob_v2
Validate that the data pointer location matches the requested device_type. This prevents silent data corruption when tensors are created with mismatched data pointer and device type. Changes: - Add cudaPointerGetAttributes check to verify data pointer location - Error out if CPU data is provided but CUDA device is requested - Error out if CUDA data is provided but CPU device is requested - Add unit tests for device type mismatch scenarios ghstack-source-id: ca29f52 ghstack-comment-id: 3676248526 Pull-Request: #16344
1 parent 89c6198 commit ec3b3f4

File tree

6 files changed

+399
-2
lines changed

6 files changed

+399
-2
lines changed

.github/workflows/cuda.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,32 @@ jobs:
8787
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
8888
PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda
8989
90+
test-cuda-shims:
91+
name: test-cuda-shims
92+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
93+
permissions:
94+
id-token: write
95+
contents: read
96+
with:
97+
timeout: 90
98+
runner: linux.g5.4xlarge.nvidia.gpu
99+
gpu-arch-type: cuda
100+
gpu-arch-version: 12.6
101+
use-custom-docker-registry: false
102+
submodules: recursive
103+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
104+
script: |
105+
set -eux
106+
107+
# Build ExecuTorch with CUDA support
108+
./install_executorch.sh
109+
110+
# Build and run CUDA shim tests
111+
pushd backends/cuda/runtime/shims/tests
112+
cmake --workflow --preset default
113+
ctest --preset default --output-on-failure
114+
popd
115+
90116
export-model-cuda-artifact:
91117
name: export-model-cuda-artifact
92118
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)

backends/cuda/runtime/shims/memory.cpp

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,6 @@ AOTITorchError aoti_torch_create_tensor_from_blob_v2(
119119
int32_t layout,
120120
const uint8_t* opaque_metadata,
121121
int64_t opaque_metadata_size) {
122-
// TODO(gasoonjia): verify given data is on the target device
123-
(void)device_type;
124122
(void)opaque_metadata;
125123
(void)layout;
126124
(void)opaque_metadata_size;
@@ -154,6 +152,34 @@ AOTITorchError aoti_torch_create_tensor_from_blob_v2(
154152
// Storage offset must be 0 since from_blob cannot handle different offsets
155153
ET_CHECK_OK_OR_RETURN_ERROR(validate_storage_offset(storage_offset));
156154

155+
// Verify that data pointer location matches the requested device_type
156+
cudaPointerAttributes data_attributes{};
157+
ET_CUDA_CHECK_OR_RETURN_ERROR(
158+
cudaPointerGetAttributes(&data_attributes, data));
159+
160+
bool data_is_on_device = data_attributes.type == cudaMemoryTypeDevice;
161+
bool data_is_on_host = data_attributes.type == cudaMemoryTypeHost ||
162+
data_attributes.type == cudaMemoryTypeUnregistered;
163+
bool requested_device =
164+
device_type == static_cast<int32_t>(SupportedDevices::CUDA);
165+
bool requested_cpu =
166+
device_type == static_cast<int32_t>(SupportedDevices::CPU);
167+
168+
// Error if data location doesn't match requested device type
169+
ET_CHECK_OR_RETURN_ERROR(
170+
!(data_is_on_device && requested_cpu),
171+
InvalidArgument,
172+
"aoti_torch_create_tensor_from_blob_v2 failed: data pointer %p is on CUDA "
173+
"but device_type is CPU. Data must be on CPU for CPU tensors.",
174+
data);
175+
176+
ET_CHECK_OR_RETURN_ERROR(
177+
!(data_is_on_host && requested_device),
178+
InvalidArgument,
179+
"aoti_torch_create_tensor_from_blob_v2 failed: data pointer %p is on CPU "
180+
"but device_type is CUDA. Data must be on GPU for CUDA tensors.",
181+
data);
182+
157183
// Convert sizes to the format expected by ExecutorTorch using SizesType
158184
std::vector<executorch::aten::SizesType> sizes =
159185
convert_sizes_to_vector(ndim, sizes_ptr);
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
cmake_minimum_required(VERSION 3.19)
8+
project(aoti_cuda_shim_tests LANGUAGES CXX CUDA)
9+
10+
set(CMAKE_CXX_STANDARD 17)
11+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
12+
13+
# Find required packages
14+
find_package(CUDAToolkit REQUIRED)
15+
find_package(GTest REQUIRED)
16+
17+
# Get EXECUTORCH_ROOT
18+
if(NOT EXECUTORCH_ROOT)
19+
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../../..)
20+
endif()
21+
22+
# Find installed ExecuTorch
23+
find_package(executorch CONFIG REQUIRED HINTS ${CMAKE_INSTALL_PREFIX})
24+
25+
# List of test files
26+
set(CUDA_SHIM_TESTS
27+
test_aoti_torch_create_tensor_from_blob_v2
28+
test_aoti_torch_empty_strided
29+
test_aoti_torch_delete_tensor_object
30+
test_aoti_torch__reinterpret_tensor
31+
test_aoti_torch_copy_
32+
test_aoti_torch_new_tensor_handle
33+
)
34+
35+
enable_testing()
36+
37+
foreach(test_name ${CUDA_SHIM_TESTS})
38+
add_executable(${test_name} ${test_name}.cpp)
39+
40+
target_include_directories(
41+
${test_name}
42+
PRIVATE ${EXECUTORCH_ROOT}
43+
${CUDAToolkit_INCLUDE_DIRS}
44+
)
45+
46+
target_link_libraries(
47+
${test_name}
48+
PRIVATE GTest::gtest
49+
GTest::gtest_main
50+
aoti_cuda_shims
51+
aoti_cuda_backend
52+
cuda_tensor_maker
53+
cuda_platform
54+
executorch_core
55+
extension_tensor
56+
CUDA::cudart
57+
)
58+
59+
add_test(NAME ${test_name} COMMAND ${test_name})
60+
endforeach()
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
{
2+
"version": 6,
3+
"configurePresets": [
4+
{
5+
"name": "default",
6+
"displayName": "CUDA Shim Tests",
7+
"binaryDir": "${sourceDir}/../../../../../cmake-out/backends/cuda/runtime/shims/tests",
8+
"cacheVariables": {
9+
"CMAKE_BUILD_TYPE": "Release",
10+
"CMAKE_PREFIX_PATH": "${sourceDir}/../../../../../cmake-out"
11+
},
12+
"condition": {
13+
"type": "inList",
14+
"string": "${hostSystemName}",
15+
"list": ["Linux", "Windows"]
16+
}
17+
},
18+
{
19+
"name": "debug",
20+
"displayName": "CUDA Shim Tests (Debug)",
21+
"inherits": ["default"],
22+
"cacheVariables": {
23+
"CMAKE_BUILD_TYPE": "Debug"
24+
}
25+
}
26+
],
27+
"buildPresets": [
28+
{
29+
"name": "default",
30+
"displayName": "Build CUDA Shim Tests",
31+
"configurePreset": "default"
32+
},
33+
{
34+
"name": "debug",
35+
"displayName": "Build CUDA Shim Tests (Debug)",
36+
"configurePreset": "debug"
37+
}
38+
],
39+
"workflowPresets": [
40+
{
41+
"name": "default",
42+
"displayName": "Configure and build CUDA Shim Tests",
43+
"steps": [
44+
{
45+
"type": "configure",
46+
"name": "default"
47+
},
48+
{
49+
"type": "build",
50+
"name": "default"
51+
}
52+
]
53+
},
54+
{
55+
"name": "debug",
56+
"displayName": "Configure and build CUDA Shim Tests (Debug)",
57+
"steps": [
58+
{
59+
"type": "configure",
60+
"name": "debug"
61+
},
62+
{
63+
"type": "build",
64+
"name": "debug"
65+
}
66+
]
67+
}
68+
],
69+
"testPresets": [
70+
{
71+
"name": "default",
72+
"displayName": "Run all CUDA Shim Tests",
73+
"configurePreset": "default",
74+
"output": {
75+
"outputOnFailure": true
76+
}
77+
},
78+
{
79+
"name": "debug",
80+
"displayName": "Run all CUDA Shim Tests (Debug)",
81+
"configurePreset": "debug",
82+
"output": {
83+
"outputOnFailure": true
84+
}
85+
}
86+
]
87+
}
88+
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# CUDA AOTI Shim Tests
2+
3+
Unit tests for the CUDA AOTI (Ahead-Of-Time Inductor) shim functions used by the ExecuTorch CUDA backend.
4+
5+
## Prerequisites
6+
7+
1. **CUDA Toolkit**: Ensure CUDA is installed and available
8+
2. **ExecuTorch with CUDA**: Build and install ExecuTorch with CUDA support first
9+
10+
## Building ExecuTorch with CUDA
11+
12+
From the ExecuTorch root directory:
13+
14+
```bash
15+
# Release build
16+
cmake --workflow --preset llm-release-cuda
17+
18+
# Or debug build (recommended for debugging test failures)
19+
cmake --workflow --preset llm-debug-cuda
20+
```
21+
22+
## Building the Tests
23+
24+
### Option 1: Using CMake Presets (Recommended)
25+
26+
From this directory (`backends/cuda/runtime/shims/tests/`):
27+
28+
```bash
29+
# Release build
30+
cmake --workflow --preset default
31+
32+
# Debug build
33+
cmake --workflow --preset debug
34+
```
35+
36+
### Option 2: Manual CMake Commands
37+
38+
From the ExecuTorch root directory:
39+
40+
```bash
41+
# Configure
42+
cmake -B cmake-out/backends/cuda/runtime/shims/tests \
43+
-S backends/cuda/runtime/shims/tests \
44+
-DCMAKE_PREFIX_PATH=$(pwd)/cmake-out \
45+
-DCMAKE_BUILD_TYPE=Debug
46+
47+
# Build
48+
cmake --build cmake-out/backends/cuda/runtime/shims/tests -j$(nproc)
49+
```
50+
51+
## Running the Tests
52+
53+
### Run All Tests
54+
55+
```bash
56+
# Using ctest (from the build directory)
57+
cd cmake-out/backends/cuda/runtime/shims/tests
58+
ctest --output-on-failure
59+
60+
# Or using the test preset (from this directory)
61+
ctest --preset default
62+
```
63+
64+
### Run a Specific Test
65+
66+
```bash
67+
# From the build directory
68+
./test_aoti_torch_create_tensor_from_blob_v2
69+
./test_aoti_torch_empty_strided
70+
./test_aoti_torch_delete_tensor_object
71+
./test_aoti_torch_copy_
72+
./test_aoti_torch_new_tensor_handle
73+
./test_aoti_torch_item_bool
74+
./test_aoti_torch_assign_tensors_out
75+
```
76+
77+
### Run Specific Test Cases
78+
79+
Use Google Test filters to run specific test cases:
80+
81+
```bash
82+
# Run only device mismatch tests
83+
./test_aoti_torch_create_tensor_from_blob_v2 --gtest_filter="*DeviceMismatch*"
84+
85+
# Run a single test
86+
./test_aoti_torch_create_tensor_from_blob_v2 --gtest_filter="AOTITorchCreateTensorFromBlobV2Test.BasicFunctionalityCUDA"
87+
88+
# List all available tests
89+
./test_aoti_torch_create_tensor_from_blob_v2 --gtest_list_tests
90+
```
91+
92+
## Test Descriptions
93+
94+
| Test File | Description |
95+
|-----------|-------------|
96+
| `test_aoti_torch_create_tensor_from_blob_v2` | Tests tensor creation from existing memory blobs, including device type validation |
97+
| `test_aoti_torch_empty_strided` | Tests creation of uninitialized tensors with specified strides |
98+
| `test_aoti_torch_delete_tensor_object` | Tests proper tensor deletion and memory management |
99+
| `test_aoti_torch__reinterpret_tensor` | Tests tensor view reinterpretation with different shapes/strides |
100+
| `test_aoti_torch_copy_` | Tests tensor copy operations between CPU and CUDA |
101+
| `test_aoti_torch_new_tensor_handle` | Tests creating new tensor handles that share memory |
102+
| `test_aoti_torch_item_bool` | Tests extracting boolean values from scalar tensors |
103+
| `test_aoti_torch_assign_tensors_out` | Tests creating tensor views that share underlying data |
104+
105+
## Troubleshooting
106+
107+
### CUDA Not Available
108+
109+
If tests are skipped with "CUDA not available", ensure:
110+
- CUDA drivers are installed
111+
- A CUDA-capable GPU is present
112+
- `nvidia-smi` shows the GPU
113+
114+
### Link Errors
115+
116+
If you get link errors, ensure ExecuTorch was built with CUDA support:
117+
```bash
118+
cmake --workflow --preset llm-release-cuda
119+
```
120+
121+
### Test Failures
122+
123+
For debugging test failures, build with debug mode:
124+
```bash
125+
cmake --workflow --preset debug
126+
```
127+
128+
Then run with verbose output:
129+
```bash
130+
./test_aoti_torch_create_tensor_from_blob_v2 --gtest_break_on_failure
131+
```
132+

0 commit comments

Comments
 (0)