pytorch · JacobSzwejbka · Dec 19, 2025 · Dec 20, 2025
@@ -0,0 +1,110 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+cmake_minimum_required(VERSION 3.24)
+project(parakeet_runner)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
+
+include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
+
+if(CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$")
+  set(CMAKE_TOOLCHAIN_IOS ON)
+else()
+  set(CMAKE_TOOLCHAIN_IOS OFF)
+endif()
+
+# Let files say "include <executorch/path/to/header.h>"
+set(_common_include_directories ${EXECUTORCH_ROOT}/..)
+
+# Need this for gflags
+set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
+find_package(gflags REQUIRED)
+
+# Find executorch libraries
+list(APPEND CMAKE_FIND_ROOT_PATH ${CMAKE_CURRENT_BINARY_DIR}/../../..)
+find_package(executorch CONFIG REQUIRED FIND_ROOT_PATH_BOTH)
+executorch_target_link_options_shared_lib(executorch)
+
+set(link_libraries executorch gflags)
+
+# Common ops for all builds
+list(APPEND link_libraries optimized_native_cpu_ops_lib cpublas eigen_blas)
+executorch_target_link_options_shared_lib(optimized_native_cpu_ops_lib)
+
+# CPU-only builds need quantized and custom ops
+if(NOT EXECUTORCH_BUILD_CUDA AND MSVC)
+  list(APPEND link_libraries quantized_ops_lib custom_ops)
+  executorch_target_link_options_shared_lib(quantized_ops_lib)
+  executorch_target_link_options_shared_lib(custom_ops)
+endif()
+
+# XNNPACK
+if(TARGET xnnpack_backend)
+  set(xnnpack_backend_libs xnnpack_backend XNNPACK xnnpack-microkernels-prod)
+  if(TARGET kleidiai)
+    list(APPEND xnnpack_backend_libs kleidiai)
+  endif()
+  list(APPEND link_libraries ${xnnpack_backend_libs})
+  executorch_target_link_options_shared_lib(xnnpack_backend)
+endif()
+
+# Needed for cpuinfo where it uses android specific log lib
+if(ANDROID)
+  list(APPEND link_libraries log)
+endif()
+
+# Add the required ExecuTorch extensions
+list(
+  APPEND
+  link_libraries
+  extension_llm_runner
+  extension_module
+  extension_data_loader
+  extension_tensor
+  extension_flat_tensor
+  tokenizers::tokenizers
+)
+
+# Link CUDA backend
+if(EXECUTORCH_BUILD_CUDA)
+  find_package(CUDAToolkit REQUIRED)
+  list(APPEND link_libraries aoti_cuda_backend)
+  if(NOT MSVC)
+    executorch_target_link_options_shared_lib(aoti_cuda_backend)
+  endif()
+endif()
+
+if(EXECUTORCH_BUILD_METAL)
+  list(APPEND link_libraries metal_backend)
+  executorch_target_link_options_shared_lib(metal_backend)
+endif()
+
+add_executable(parakeet_runner main.cpp)
+if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
+  target_link_options_gc_sections(parakeet_runner)
+  if(NOT APPLE AND NOT MSVC)
+    target_link_options(parakeet_runner PRIVATE "LINKER:-s")
+  endif()
+endif()
+
+target_include_directories(parakeet_runner PUBLIC ${_common_include_directories})
+target_link_libraries(parakeet_runner PUBLIC ${link_libraries})
+target_compile_options(parakeet_runner PUBLIC ${_common_compile_options})
+
+# On Windows, copy required DLLs to the executable directory
+if(MSVC AND EXECUTORCH_BUILD_CUDA)
+  add_custom_command(
+    TARGET parakeet_runner
+    POST_BUILD
+    COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:aoti_cuda_shims>
+            $<TARGET_FILE_DIR:parakeet_runner>
+    COMMENT "Copying aoti_cuda_shims.dll to parakeet_runner directory"
+  )
+endif()
@@ -0,0 +1,110 @@
+{
+    "version": 6,
+    "configurePresets": [
+        {
+            "name": "parakeet-base",
+            "hidden": true,
+            "binaryDir": "${sourceDir}/../../../cmake-out/examples/models/parakeet",
+            "cacheVariables": {
+                "CMAKE_BUILD_TYPE": "Release",
+                "CMAKE_FIND_ROOT_PATH": "${sourceDir}/../../../cmake-out",
+                "CMAKE_PREFIX_PATH": "${sourceDir}/../../../cmake-out"
+            }
+        },
+        {
+            "name": "parakeet-cpu",
+            "displayName": "Parakeet runner (CPU)",
+            "inherits": ["parakeet-base"]
+        },
+        {
+            "name": "parakeet-cuda",
+            "displayName": "Parakeet runner (CUDA)",
+            "inherits": ["parakeet-base"],
+            "cacheVariables": {
+                "EXECUTORCH_BUILD_CUDA": "ON"
+            },
+            "condition": {
+                "type": "inList",
+                "string": "${hostSystemName}",
+                "list": ["Linux", "Windows"]
+            }
+        },
+        {
+            "name": "parakeet-metal",
+            "displayName": "Parakeet runner (Metal)",
+            "inherits": ["parakeet-base"],
+            "cacheVariables": {
+                "EXECUTORCH_BUILD_METAL": "ON"
+            },
+            "condition": {
+                "lhs": "${hostSystemName}",
+                "type": "equals",
+                "rhs": "Darwin"
+            }
+        }
+    ],
+    "buildPresets": [
+        {
+            "name": "parakeet-cpu",
+            "displayName": "Build Parakeet runner (CPU)",
+            "configurePreset": "parakeet-cpu",
+            "targets": ["parakeet_runner"]
+        },
+        {
+            "name": "parakeet-cuda",
+            "displayName": "Build Parakeet runner (CUDA)",
+            "configurePreset": "parakeet-cuda",
+            "targets": ["parakeet_runner"]
+        },
+        {
+            "name": "parakeet-metal",
+            "displayName": "Build Parakeet runner (Metal)",
+            "configurePreset": "parakeet-metal",
+            "targets": ["parakeet_runner"]
+        }
+    ],
+    "workflowPresets": [
+        {
+            "name": "parakeet-cpu",
+            "displayName": "Configure and build Parakeet runner (CPU)",
+            "steps": [
+                {
+                    "type": "configure",
+                    "name": "parakeet-cpu"
+                },
+                {
+                    "type": "build",
+                    "name": "parakeet-cpu"
+                }
+            ]
+        },
+        {
+            "name": "parakeet-cuda",
+            "displayName": "Configure and build Parakeet runner (CUDA)",
+            "steps": [
+                {
+                    "type": "configure",
+                    "name": "parakeet-cuda"
+                },
+                {
+                    "type": "build",
+                    "name": "parakeet-cuda"
+                }
+            ]
+        },
+        {
+            "name": "parakeet-metal",
+            "displayName": "Configure and build Parakeet runner (Metal)",
+            "steps": [
+                {
+                    "type": "configure",
+                    "name": "parakeet-metal"
+                },
+                {
+                    "type": "build",
+                    "name": "parakeet-metal"
+                }
+            ]
+        }
+    ]
+}
@@ -0,0 +1,84 @@
+# Parakeet TDT Export for ExecuTorch
+
+Export [nvidia/parakeet-tdt-0.6b-v3](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3) speech recognition model to ExecuTorch.
+
+## Installation
+
+```bash
+pip install nemo_toolkit[asr] torchaudio
+```
+
+## Export
+
+Export the model (portable backend):
+```bash
+python export_parakeet_tdt.py
+```
+
+Export with a specific backend:
+```bash
+python export_parakeet_tdt.py --backend xnnpack      # CPU acceleration
+python export_parakeet_tdt.py --backend cuda         # CUDA acceleration
+python export_parakeet_tdt.py --backend cuda-windows # CUDA on Windows
+```
+
+Test transcription on an audio file:
+```bash
+python export_parakeet_tdt.py --audio /path/to/audio.wav
+```
+
+### Export Arguments
+
+| Argument | Description |
+|----------|-------------|
+| `--output-dir` | Output directory for exports (default: `./parakeet_tdt_exports`) |
+| `--backend` | Backend for acceleration: `portable`, `xnnpack`, `cuda`, `cuda-windows` (default: `portable`) |
+| `--audio` | Path to audio file for transcription test |
+
+## C++ Runner
+
+### Building
+
+First, build ExecuTorch with the LLM preset:
+
+```bash
+cd executorch
+cmake --workflow --preset llm-release
+```
+
+Then build the parakeet runner:
+
+```bash
+cd examples/models/parakeet
+cmake --workflow --preset parakeet-cpu
+```
+
+For Metal (macOS):
+```bash
+cd examples/models/parakeet
+cmake --workflow --preset parakeet-metal
+```
+
+For CUDA (Linux/Windows):
+```bash
+cd examples/models/parakeet
+cmake --workflow --preset parakeet-cuda
+```
+
+### Running
+
+```bash
+./cmake-out/examples/models/parakeet/parakeet_runner \
+  --model_path parakeet.pte \
+  --processor_path preprocessor.pte \
+  --audio_path audio.wav
+```
+
+### Runner Arguments
+
+| Argument | Description |
+|----------|-------------|
+| `--model_path` | Path to Parakeet model (.pte) |
+| `--processor_path` | Path to preprocessor .pte for mel spectrogram extraction |
+| `--audio_path` | Path to input audio file (.wav) |
+| `--tokenizer_path` | Path to tokenizer file (for token-to-text conversion) |