Add device type validation in aoti_torch_create_tensor_from_blob_v2

larryliu0820 · larryliu0820 · commit ec3b3f47c964 · 2025-12-19T11:21:09.000-08:00
Validate that the data pointer location matches the requested device_type. This prevents silent data corruption when tensors are created with mismatched data pointer and device type. Changes: - Add cudaPointerGetAttributes check to verify data pointer location - Error out if CPU data is provided but CUDA device is requested - Error out if CUDA data is provided but CPU device is requested - Add unit tests for device type mismatch scenarios ghstack-source-id: ca29f52 ghstack-comment-id: 3676248526 Pull-Request: #16344
diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
@@ -87,6 +87,32 @@ jobs:
         export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
         PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda
 
+  test-cuda-shims:
+    name: test-cuda-shims
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      timeout: 90
+      runner: linux.g5.4xlarge.nvidia.gpu
+      gpu-arch-type: cuda
+      gpu-arch-version: 12.6
+      use-custom-docker-registry: false
+      submodules: recursive
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      script: |
+        set -eux
+
+        # Build ExecuTorch with CUDA support
+        ./install_executorch.sh
+
+        # Build and run CUDA shim tests
+        pushd backends/cuda/runtime/shims/tests
+        cmake --workflow --preset default
+        ctest --preset default --output-on-failure
+        popd
+
   export-model-cuda-artifact:
     name: export-model-cuda-artifact
     # Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
diff --git a/backends/cuda/runtime/shims/memory.cpp b/backends/cuda/runtime/shims/memory.cpp
@@ -119,8 +119,6 @@ AOTITorchError aoti_torch_create_tensor_from_blob_v2(
     int32_t layout,
     const uint8_t* opaque_metadata,
     int64_t opaque_metadata_size) {
-  // TODO(gasoonjia): verify given data is on the target device
-  (void)device_type;
   (void)opaque_metadata;
   (void)layout;
   (void)opaque_metadata_size;
@@ -154,6 +152,34 @@ AOTITorchError aoti_torch_create_tensor_from_blob_v2(
   // Storage offset must be 0 since from_blob cannot handle different offsets
   ET_CHECK_OK_OR_RETURN_ERROR(validate_storage_offset(storage_offset));
 
+  // Verify that data pointer location matches the requested device_type
+  cudaPointerAttributes data_attributes{};
+  ET_CUDA_CHECK_OR_RETURN_ERROR(
+      cudaPointerGetAttributes(&data_attributes, data));
+
+  bool data_is_on_device = data_attributes.type == cudaMemoryTypeDevice;
+  bool data_is_on_host = data_attributes.type == cudaMemoryTypeHost ||
+      data_attributes.type == cudaMemoryTypeUnregistered;
+  bool requested_device =
+      device_type == static_cast<int32_t>(SupportedDevices::CUDA);
+  bool requested_cpu =
+      device_type == static_cast<int32_t>(SupportedDevices::CPU);
+
+  // Error if data location doesn't match requested device type
+  ET_CHECK_OR_RETURN_ERROR(
+      !(data_is_on_device && requested_cpu),
+      InvalidArgument,
+      "aoti_torch_create_tensor_from_blob_v2 failed: data pointer %p is on CUDA "
+      "but device_type is CPU. Data must be on CPU for CPU tensors.",
+      data);
+
+  ET_CHECK_OR_RETURN_ERROR(
+      !(data_is_on_host && requested_device),
+      InvalidArgument,
+      "aoti_torch_create_tensor_from_blob_v2 failed: data pointer %p is on CPU "
+      "but device_type is CUDA. Data must be on GPU for CUDA tensors.",
+      data);
+
   // Convert sizes to the format expected by ExecutorTorch using SizesType
   std::vector<executorch::aten::SizesType> sizes =
       convert_sizes_to_vector(ndim, sizes_ptr);
diff --git a/backends/cuda/runtime/shims/tests/CMakeLists.txt b/backends/cuda/runtime/shims/tests/CMakeLists.txt
@@ -0,0 +1,60 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+cmake_minimum_required(VERSION 3.19)
+project(aoti_cuda_shim_tests LANGUAGES CXX CUDA)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Find required packages
+find_package(CUDAToolkit REQUIRED)
+find_package(GTest REQUIRED)
+
+# Get EXECUTORCH_ROOT
+if(NOT EXECUTORCH_ROOT)
+  set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../../..)
+endif()
+
+# Find installed ExecuTorch
+find_package(executorch CONFIG REQUIRED HINTS ${CMAKE_INSTALL_PREFIX})
+
+# List of test files
+set(CUDA_SHIM_TESTS
+    test_aoti_torch_create_tensor_from_blob_v2
+    test_aoti_torch_empty_strided
+    test_aoti_torch_delete_tensor_object
+    test_aoti_torch__reinterpret_tensor
+    test_aoti_torch_copy_
+    test_aoti_torch_new_tensor_handle
+)
+
+enable_testing()
+
+foreach(test_name ${CUDA_SHIM_TESTS})
+  add_executable(${test_name} ${test_name}.cpp)
+
+  target_include_directories(
+    ${test_name}
+    PRIVATE ${EXECUTORCH_ROOT}
+            ${CUDAToolkit_INCLUDE_DIRS}
+  )
+
+  target_link_libraries(
+    ${test_name}
+    PRIVATE GTest::gtest
+            GTest::gtest_main
+            aoti_cuda_shims
+            aoti_cuda_backend
+            cuda_tensor_maker
+            cuda_platform
+            executorch_core
+            extension_tensor
+            CUDA::cudart
+  )
+
+  add_test(NAME ${test_name} COMMAND ${test_name})
+endforeach()
diff --git a/backends/cuda/runtime/shims/tests/CMakePresets.json b/backends/cuda/runtime/shims/tests/CMakePresets.json
@@ -0,0 +1,88 @@
+{
+    "version": 6,
+    "configurePresets": [
+        {
+            "name": "default",
+            "displayName": "CUDA Shim Tests",
+            "binaryDir": "${sourceDir}/../../../../../cmake-out/backends/cuda/runtime/shims/tests",
+            "cacheVariables": {
+                "CMAKE_BUILD_TYPE": "Release",
+                "CMAKE_PREFIX_PATH": "${sourceDir}/../../../../../cmake-out"
+            },
+            "condition": {
+                "type": "inList",
+                "string": "${hostSystemName}",
+                "list": ["Linux", "Windows"]
+            }
+        },
+        {
+            "name": "debug",
+            "displayName": "CUDA Shim Tests (Debug)",
+            "inherits": ["default"],
+            "cacheVariables": {
+                "CMAKE_BUILD_TYPE": "Debug"
+            }
+        }
+    ],
+    "buildPresets": [
+        {
+            "name": "default",
+            "displayName": "Build CUDA Shim Tests",
+            "configurePreset": "default"
+        },
+        {
+            "name": "debug",
+            "displayName": "Build CUDA Shim Tests (Debug)",
+            "configurePreset": "debug"
+        }
+    ],
+    "workflowPresets": [
+        {
+            "name": "default",
+            "displayName": "Configure and build CUDA Shim Tests",
+            "steps": [
+                {
+                    "type": "configure",
+                    "name": "default"
+                },
+                {
+                    "type": "build",
+                    "name": "default"
+                }
+            ]
+        },
+        {
+            "name": "debug",
+            "displayName": "Configure and build CUDA Shim Tests (Debug)",
+            "steps": [
+                {
+                    "type": "configure",
+                    "name": "debug"
+                },
+                {
+                    "type": "build",
+                    "name": "debug"
+                }
+            ]
+        }
+    ],
+    "testPresets": [
+        {
+            "name": "default",
+            "displayName": "Run all CUDA Shim Tests",
+            "configurePreset": "default",
+            "output": {
+                "outputOnFailure": true
+            }
+        },
+        {
+            "name": "debug",
+            "displayName": "Run all CUDA Shim Tests (Debug)",
+            "configurePreset": "debug",
+            "output": {
+                "outputOnFailure": true
+            }
+        }
+    ]
+}
+
diff --git a/backends/cuda/runtime/shims/tests/README.md b/backends/cuda/runtime/shims/tests/README.md
@@ -0,0 +1,132 @@
+# CUDA AOTI Shim Tests
+
+Unit tests for the CUDA AOTI (Ahead-Of-Time Inductor) shim functions used by the ExecuTorch CUDA backend.
+
+## Prerequisites
+
+1. **CUDA Toolkit**: Ensure CUDA is installed and available
+2. **ExecuTorch with CUDA**: Build and install ExecuTorch with CUDA support first
+
+## Building ExecuTorch with CUDA
+
+From the ExecuTorch root directory:
+
+```bash
+# Release build
+cmake --workflow --preset llm-release-cuda
+
+# Or debug build (recommended for debugging test failures)
+cmake --workflow --preset llm-debug-cuda
+```
+
+## Building the Tests
+
+### Option 1: Using CMake Presets (Recommended)
+
+From this directory (`backends/cuda/runtime/shims/tests/`):
+
+```bash
+# Release build
+cmake --workflow --preset default
+
+# Debug build
+cmake --workflow --preset debug
+```
+
+### Option 2: Manual CMake Commands
+
+From the ExecuTorch root directory:
+
+```bash
+# Configure
+cmake -B cmake-out/backends/cuda/runtime/shims/tests \
+  -S backends/cuda/runtime/shims/tests \
+  -DCMAKE_PREFIX_PATH=$(pwd)/cmake-out \
+  -DCMAKE_BUILD_TYPE=Debug
+
+# Build
+cmake --build cmake-out/backends/cuda/runtime/shims/tests -j$(nproc)
+```
+
+## Running the Tests
+
+### Run All Tests
+
+```bash
+# Using ctest (from the build directory)
+cd cmake-out/backends/cuda/runtime/shims/tests
+ctest --output-on-failure
+
+# Or using the test preset (from this directory)
+ctest --preset default
+```
+
+### Run a Specific Test
+
+```bash
+# From the build directory
+./test_aoti_torch_create_tensor_from_blob_v2
+./test_aoti_torch_empty_strided
+./test_aoti_torch_delete_tensor_object
+./test_aoti_torch_copy_
+./test_aoti_torch_new_tensor_handle
+./test_aoti_torch_item_bool
+./test_aoti_torch_assign_tensors_out
+```
+
+### Run Specific Test Cases
+
+Use Google Test filters to run specific test cases:
+
+```bash
+# Run only device mismatch tests
+./test_aoti_torch_create_tensor_from_blob_v2 --gtest_filter="*DeviceMismatch*"
+
+# Run a single test
+./test_aoti_torch_create_tensor_from_blob_v2 --gtest_filter="AOTITorchCreateTensorFromBlobV2Test.BasicFunctionalityCUDA"
+
+# List all available tests
+./test_aoti_torch_create_tensor_from_blob_v2 --gtest_list_tests
+```
+
+## Test Descriptions
+
+| Test File | Description |
+|-----------|-------------|
+| `test_aoti_torch_create_tensor_from_blob_v2` | Tests tensor creation from existing memory blobs, including device type validation |
+| `test_aoti_torch_empty_strided` | Tests creation of uninitialized tensors with specified strides |
+| `test_aoti_torch_delete_tensor_object` | Tests proper tensor deletion and memory management |
+| `test_aoti_torch__reinterpret_tensor` | Tests tensor view reinterpretation with different shapes/strides |
+| `test_aoti_torch_copy_` | Tests tensor copy operations between CPU and CUDA |
+| `test_aoti_torch_new_tensor_handle` | Tests creating new tensor handles that share memory |
+| `test_aoti_torch_item_bool` | Tests extracting boolean values from scalar tensors |
+| `test_aoti_torch_assign_tensors_out` | Tests creating tensor views that share underlying data |
+
+## Troubleshooting
+
+### CUDA Not Available
+
+If tests are skipped with "CUDA not available", ensure:
+- CUDA drivers are installed
+- A CUDA-capable GPU is present
+- `nvidia-smi` shows the GPU
+
+### Link Errors
+
+If you get link errors, ensure ExecuTorch was built with CUDA support:
+```bash
+cmake --workflow --preset llm-release-cuda
+```
+
+### Test Failures
+
+For debugging test failures, build with debug mode:
+```bash
+cmake --workflow --preset debug
+```
+
+Then run with verbose output:
+```bash
+./test_aoti_torch_create_tensor_from_blob_v2 --gtest_break_on_failure
+```
+
diff --git a/backends/cuda/runtime/shims/tests/test_aoti_torch_create_tensor_from_blob_v2.cpp b/backends/cuda/runtime/shims/tests/test_aoti_torch_create_tensor_from_blob_v2.cpp