diff --git a/.gitignore b/.gitignore
index b773865..20ecca3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,7 @@ mkl_fft/_pydfti.c
 mkl_fft/_pydfti.cpython*.so
 mkl_fft/_pydfti.*-win_amd64.pyd
 mkl_fft/src/mklfft.c
+
+# ASV benchmark artifacts
+.asv/
+benchmarks/.asv/
diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 0000000..8689ee1
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,29 @@
+# mkl_fft ASV Benchmarks
+
+Performance benchmarks for [mkl_fft](https://github.com/IntelPython/mkl_fft) using
+[Airspeed Velocity (ASV)](https://asv.readthedocs.io/en/stable/).
+
+### Coverage
+
+| File | API | Transforms | Dtypes | Sizes/Shapes |
+|------|-----|-----------|--------|-------------|
+| `bench_fft1d.py` | `mkl_fft` | `fft`, `ifft`, `rfft`, `irfft` | float32, float64, complex64, complex128 | power-of-two and non-power-of-two |
+| `bench_fftnd.py` | `mkl_fft` | `fft2`, `ifft2`, `rfft2`, `irfft2`, `fftn`, `ifftn`, `rfftn`, `irfftn` | float32, float64, complex64, complex128 | square and non-square/non-cubic |
+| `bench_numpy_fft.py` | `mkl_fft.interfaces.numpy_fft` | All exported functions including Hermitian (`hfft`, `ihfft`) | float32, float64, complex64, complex128 | power-of-two |
+| `bench_scipy_fft.py` | `mkl_fft.interfaces.scipy_fft` | All exported functions including Hermitian 2-D/N-D (`hfft2`, `hfftn`) | float32, float64, complex64, complex128 | square and cubic |
+| `bench_memory.py` | `mkl_fft` | Peak RSS for 1-D, 2-D, and 3-D transforms | float32, float64, complex128 | power-of-two |
+
+## Threading
+
+Set `MKL_NUM_THREADS` in the environment before running ASV to control the
+thread count used by MKL:
+
+```bash
+MKL_NUM_THREADS=8 asv run --python=same --quick HEAD^!
+```
+
+If `MKL_NUM_THREADS` is not set, `__init__.py` applies a default: **4** threads
+when the machine has 4 or more physical cores, or **1** (single-threaded)
+otherwise. This keeps results comparable across CI machines in the shared pool
+regardless of their total core count. Physical cores are read from
+`/proc/cpuinfo` — hyperthreads are excluded per MKL recommendation.
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
new file mode 100644
index 0000000..aa66103
--- /dev/null
+++ b/benchmarks/asv.conf.json
@@ -0,0 +1,19 @@
+{
+    "version": 1,
+    "project": "mkl_fft",
+    "project_url": "https://github.com/IntelPython/mkl_fft",
+    "show_commit_url": "https://github.com/IntelPython/mkl_fft/commit/",
+    "repo": "..",
+    "branches": [
+        "master"
+    ],
+    "benchmark_dir": "benchmarks",
+    "env_dir": ".asv/env",
+    "results_dir": ".asv/results",
+    "html_dir": ".asv/html",
+    "build_cache_size": 2,
+    "default_benchmark_timeout": 500,
+    "regressions_thresholds": {
+        ".*": 0.3
+    }
+}
diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
new file mode 100644
index 0000000..8c294d8
--- /dev/null
+++ b/benchmarks/benchmarks/__init__.py
@@ -0,0 +1,48 @@
+"""ASV benchmarks for mkl_fft.
+
+Thread control — design rationale
+----------------------------------
+Since we do not have a dedicated CI benchmark machine, benchmarks run on a shared CI pool
+whose machines vary in core count over time.
+Using the full physical core count of each machine would make results
+incomparable across runs on different machines.
+
+Strategy:
+  - Physical cores >= 4  →  fix MKL_NUM_THREADS = 4
+      4 is the lowest common denominator that guarantees multi-threaded MKL
+      behavior and is achievable on any modern CI machine.  Results from
+      different machines in the pool are therefore directly comparable.
+  - Physical cores < 4   →  fall back to MKL_NUM_THREADS = 1 (single-threaded)
+      Prevents over-subscription on under-resourced machines and avoids
+      misleading comparisons against 4-thread baselines.
+
+MKL recommendation: use physical cores, not logical (hyperthreaded) CPUs.
+"""
+
+import os
+import re
+
+_MIN_THREADS = 4  # minimum physical cores required for multi-threaded mode
+
+
+def _physical_cores():
+    """Return physical core count from /proc/cpuinfo; fall back to 1 (conservative)."""
+    try:
+        with open("/proc/cpuinfo") as f:
+            content = f.read()
+        cpu_cores = int(re.search(r"cpu cores\s*:\s*(\d+)", content).group(1))
+        sockets = max(
+            len(set(re.findall(r"physical id\s*:\s*(\d+)", content))), 1
+        )
+        return cpu_cores * sockets
+    except Exception:
+        return 1
+
+
+def _thread_count():
+    physical = _physical_cores()
+    return str(_MIN_THREADS) if physical >= _MIN_THREADS else "1"
+
+
+_THREADS = os.environ.get("MKL_NUM_THREADS", _thread_count())
+os.environ["MKL_NUM_THREADS"] = _THREADS
diff --git a/benchmarks/benchmarks/_utils.py b/benchmarks/benchmarks/_utils.py
new file mode 100644
index 0000000..f21ff82
--- /dev/null
+++ b/benchmarks/benchmarks/_utils.py
@@ -0,0 +1,65 @@
+"""Shared utilities for mkl_fft benchmarks."""
+
+import numpy as np
+
+_RNG_SEED = 42
+
+
+def _make_input(rng, shape, dtype):
+    """Return an array of *shape* and *dtype*.
+
+    Complex dtypes get non-zero imaginary parts for a realistic signal.
+    *shape* may be an int (1-D) or a tuple.
+    """
+    dt = np.dtype(dtype)
+    s = (shape,) if isinstance(shape, int) else shape
+    if dt.kind == "c":
+        return (rng.standard_normal(s) + 1j * rng.standard_normal(s)).astype(dt)
+    return rng.standard_normal(s).astype(dt)
+
+
+class BenchC2C:
+    """Base setup for complex-to-complex benchmarks.
+
+    Subclasses define params, param_names, and time_* / peakmem_* methods.
+    """
+
+    def setup(self, shape, dtype):
+        rng = np.random.default_rng(_RNG_SEED)
+        self.x = _make_input(rng, shape, dtype)
+
+
+# dtype axes
+_DTYPES_ALL = ["float32", "float64", "complex64", "complex128"]
+_DTYPES_REAL = ["float32", "float64"]
+_DTYPES_REDUCED = ["float64", "complex128"]
+
+# shape/size axes shared across multiple files
+_SHAPES_2D = [(64, 64), (128, 128), (256, 256), (512, 512)]
+_SHAPES_2D_IFACE = [(64, 64), (256, 256), (512, 512)]
+_SHAPES_3D = [(16, 16, 16), (32, 32, 32), (64, 64, 64)]
+
+
+class BenchR2C:
+    """Base setup for real-to-complex / complex-to-real and Hermitian benchmarks.
+
+    Prepares:
+      self.x_real    — real array of full shape (rfft / ihfft input)
+      self.x_complex — complex half-spectrum array (irfft / hfft input)
+
+    Works for 1-D (shape as int) and multi-D (shape as tuple).
+    Subclasses define params, param_names, and time_* / peakmem_* methods.
+    """
+
+    def setup(self, shape, dtype):
+        rng = np.random.default_rng(_RNG_SEED)
+        cdtype = "complex64" if dtype == "float32" else "complex128"
+        if isinstance(shape, int):
+            half_shape = shape // 2 + 1
+        else:
+            half_shape = shape[:-1] + (shape[-1] // 2 + 1,)
+        self.x_real = rng.standard_normal(shape).astype(dtype)
+        self.x_complex = (
+            rng.standard_normal(half_shape)
+            + 1j * rng.standard_normal(half_shape)
+        ).astype(cdtype)
diff --git a/benchmarks/benchmarks/bench_fft1d.py b/benchmarks/benchmarks/bench_fft1d.py
new file mode 100644
index 0000000..851b905
--- /dev/null
+++ b/benchmarks/benchmarks/bench_fft1d.py
@@ -0,0 +1,84 @@
+"""Benchmarks for 1-D FFT operations using the mkl_fft root API."""
+
+import mkl_fft
+
+from ._utils import _DTYPES_ALL, _DTYPES_REAL, BenchC2C, BenchR2C
+
+_SIZES_POW2 = [64, 256, 1024, 4096, 16384, 65536]
+_SIZES_NONPOW2 = [127, 509, 1000, 4001, 10007]
+
+
+# ---------------------------------------------------------------------------
+# Complex-to-complex 1-D (power-of-two sizes)
+# ---------------------------------------------------------------------------
+
+
+class BenchFFT1D(BenchC2C):
+    """Forward and inverse complex FFT — power-of-two sizes."""
+
+    params = [_SIZES_POW2, _DTYPES_ALL]
+    param_names = ["n", "dtype"]
+
+    def time_fft(self, n, dtype):
+        mkl_fft.fft(self.x)
+
+    def time_ifft(self, n, dtype):
+        mkl_fft.ifft(self.x)
+
+
+# ---------------------------------------------------------------------------
+# Real-to-complex / complex-to-real 1-D (power-of-two sizes)
+# ---------------------------------------------------------------------------
+
+
+class BenchRFFT1D(BenchR2C):
+    """Forward rfft and inverse irfft — power-of-two sizes."""
+
+    params = [_SIZES_POW2, _DTYPES_REAL]
+    param_names = ["n", "dtype"]
+
+    def time_rfft(self, n, dtype):
+        mkl_fft.rfft(self.x_real)
+
+    def time_irfft(self, n, dtype):
+        mkl_fft.irfft(self.x_complex, n=n)
+
+
+# ---------------------------------------------------------------------------
+# Complex-to-complex 1-D (non-power-of-two sizes)
+# ---------------------------------------------------------------------------
+
+
+class BenchFFT1DNonPow2(BenchC2C):
+    """Forward and inverse complex FFT — non-power-of-two sizes.
+
+    MKL uses a different code path for non-power-of-two transforms;
+    this suite catches regressions in that path.
+    """
+
+    params = [_SIZES_NONPOW2, ["float64", "complex128", "complex64"]]
+    param_names = ["n", "dtype"]
+
+    def time_fft(self, n, dtype):
+        mkl_fft.fft(self.x)
+
+    def time_ifft(self, n, dtype):
+        mkl_fft.ifft(self.x)
+
+
+# ---------------------------------------------------------------------------
+# Real-to-complex / complex-to-real 1-D (non-power-of-two sizes)
+# ---------------------------------------------------------------------------
+
+
+class BenchRFFT1DNonPow2(BenchR2C):
+    """Forward rfft and inverse irfft — non-power-of-two sizes."""
+
+    params = [_SIZES_NONPOW2, _DTYPES_REAL]
+    param_names = ["n", "dtype"]
+
+    def time_rfft(self, n, dtype):
+        mkl_fft.rfft(self.x_real)
+
+    def time_irfft(self, n, dtype):
+        mkl_fft.irfft(self.x_complex, n=n)
diff --git a/benchmarks/benchmarks/bench_fftnd.py b/benchmarks/benchmarks/bench_fftnd.py
new file mode 100644
index 0000000..2a69477
--- /dev/null
+++ b/benchmarks/benchmarks/bench_fftnd.py
@@ -0,0 +1,143 @@
+"""Benchmarks for 2-D and N-D FFT operations using the mkl_fft root API."""
+
+import mkl_fft
+
+from ._utils import (
+    _DTYPES_ALL,
+    _DTYPES_REAL,
+    _DTYPES_REDUCED,
+    _SHAPES_2D,
+    _SHAPES_3D,
+    BenchC2C,
+    BenchR2C,
+)
+
+# ---------------------------------------------------------------------------
+# 2-D complex-to-complex (power-of-two, square + non-square)
+# ---------------------------------------------------------------------------
+
+
+class BenchFFT2D(BenchC2C):
+    """Forward and inverse 2-D FFT — square and non-square shapes."""
+
+    params = [
+        _SHAPES_2D + [(256, 128), (512, 256)],
+        _DTYPES_ALL,
+    ]
+    param_names = ["shape", "dtype"]
+
+    def time_fft2(self, shape, dtype):
+        mkl_fft.fft2(self.x)
+
+    def time_ifft2(self, shape, dtype):
+        mkl_fft.ifft2(self.x)
+
+
+# ---------------------------------------------------------------------------
+# 2-D real-to-complex / complex-to-real
+# ---------------------------------------------------------------------------
+
+
+class BenchRFFT2D(BenchR2C):
+    """Forward rfft2 and inverse irfft2."""
+
+    params = [_SHAPES_2D, _DTYPES_REAL]
+    param_names = ["shape", "dtype"]
+
+    def time_rfft2(self, shape, dtype):
+        mkl_fft.rfft2(self.x_real)
+
+    def time_irfft2(self, shape, dtype):
+        mkl_fft.irfft2(self.x_complex, s=shape)
+
+
+# ---------------------------------------------------------------------------
+# 2-D complex-to-complex (non-power-of-two)
+# ---------------------------------------------------------------------------
+
+
+class BenchFFT2DNonPow2(BenchC2C):
+    """Forward and inverse 2-D FFT — non-power-of-two sizes."""
+
+    params = [
+        [
+            (96, 96),
+            (100, 100),
+            (270, 270),
+            (500, 500),
+            (100, 200),  # non-square non-pow2
+        ],
+        _DTYPES_REDUCED,
+    ]
+    param_names = ["shape", "dtype"]
+
+    def time_fft2(self, shape, dtype):
+        mkl_fft.fft2(self.x)
+
+    def time_ifft2(self, shape, dtype):
+        mkl_fft.ifft2(self.x)
+
+
+# ---------------------------------------------------------------------------
+# N-D complex-to-complex (3-D cubes + non-cubic shape)
+# ---------------------------------------------------------------------------
+
+
+class BenchFFTnD(BenchC2C):
+    """Forward and inverse N-D FFT."""
+
+    params = [
+        _SHAPES_3D + [(32, 64, 128)],
+        _DTYPES_ALL,
+    ]
+    param_names = ["shape", "dtype"]
+
+    def time_fftn(self, shape, dtype):
+        mkl_fft.fftn(self.x)
+
+    def time_ifftn(self, shape, dtype):
+        mkl_fft.ifftn(self.x)
+
+
+# ---------------------------------------------------------------------------
+# N-D real-to-complex / complex-to-real
+# ---------------------------------------------------------------------------
+
+
+class BenchRFFTnD(BenchR2C):
+    """Forward rfftn and inverse irfftn."""
+
+    params = [_SHAPES_3D, _DTYPES_REAL]
+    param_names = ["shape", "dtype"]
+
+    def time_rfftn(self, shape, dtype):
+        mkl_fft.rfftn(self.x_real)
+
+    def time_irfftn(self, shape, dtype):
+        mkl_fft.irfftn(self.x_complex, s=shape)
+
+
+# ---------------------------------------------------------------------------
+# N-D complex-to-complex (non-power-of-two 3-D)
+# ---------------------------------------------------------------------------
+
+
+class BenchFFTnDNonPow2(BenchC2C):
+    """Forward and inverse N-D FFT — non-power-of-two sizes."""
+
+    params = [
+        [
+            (24, 24, 24),
+            (30, 30, 30),
+            (50, 50, 50),
+            (30, 40, 50),  # non-cubic non-pow2
+        ],
+        _DTYPES_REDUCED,
+    ]
+    param_names = ["shape", "dtype"]
+
+    def time_fftn(self, shape, dtype):
+        mkl_fft.fftn(self.x)
+
+    def time_ifftn(self, shape, dtype):
+        mkl_fft.ifftn(self.x)
diff --git a/benchmarks/benchmarks/bench_memory.py b/benchmarks/benchmarks/bench_memory.py
new file mode 100644
index 0000000..46d7176
--- /dev/null
+++ b/benchmarks/benchmarks/bench_memory.py
@@ -0,0 +1,90 @@
+"""Peak-memory benchmarks for FFT operations.
+
+Measures peak RSS (resident set size) to detect memory regressions
+in the mkl_fft root API across 1-D, 2-D, and 3-D transforms.
+"""
+
+import mkl_fft
+
+from ._utils import (
+    _DTYPES_REAL,
+    _DTYPES_REDUCED,
+    _SHAPES_2D,
+    _SHAPES_3D,
+    BenchC2C,
+    BenchR2C,
+)
+
+_SIZES_1D = [1024, 16384, 65536, 262144]
+
+
+# ---------------------------------------------------------------------------
+# 1-D complex FFT
+# ---------------------------------------------------------------------------
+
+
+class PeakMemFFT1D(BenchC2C):
+    """Peak RSS for 1-D complex FFT."""
+
+    params = [_SIZES_1D, _DTYPES_REDUCED]
+    param_names = ["n", "dtype"]
+
+    def peakmem_fft(self, n, dtype):
+        mkl_fft.fft(self.x)
+
+    def peakmem_ifft(self, n, dtype):
+        mkl_fft.ifft(self.x)
+
+
+# ---------------------------------------------------------------------------
+# 1-D real FFT
+# ---------------------------------------------------------------------------
+
+
+class PeakMemRFFT1D(BenchR2C):
+    """Peak RSS for 1-D real FFT (forward and inverse)."""
+
+    params = [_SIZES_1D, _DTYPES_REAL]
+    param_names = ["n", "dtype"]
+
+    def peakmem_rfft(self, n, dtype):
+        mkl_fft.rfft(self.x_real)
+
+    def peakmem_irfft(self, n, dtype):
+        mkl_fft.irfft(self.x_complex, n=n)
+
+
+# ---------------------------------------------------------------------------
+# 2-D complex FFT
+# ---------------------------------------------------------------------------
+
+
+class PeakMemFFT2D(BenchC2C):
+    """Peak RSS for 2-D complex FFT."""
+
+    params = [_SHAPES_2D, _DTYPES_REDUCED]
+    param_names = ["shape", "dtype"]
+
+    def peakmem_fft2(self, shape, dtype):
+        mkl_fft.fft2(self.x)
+
+    def peakmem_ifft2(self, shape, dtype):
+        mkl_fft.ifft2(self.x)
+
+
+# ---------------------------------------------------------------------------
+# N-D complex FFT (3-D)
+# ---------------------------------------------------------------------------
+
+
+class PeakMemFFTnD(BenchC2C):
+    """Peak RSS for N-D complex FFT (3-D shapes)."""
+
+    params = [_SHAPES_3D, _DTYPES_REDUCED]
+    param_names = ["shape", "dtype"]
+
+    def peakmem_fftn(self, shape, dtype):
+        mkl_fft.fftn(self.x)
+
+    def peakmem_ifftn(self, shape, dtype):
+        mkl_fft.ifftn(self.x)
diff --git a/benchmarks/benchmarks/bench_numpy_fft.py b/benchmarks/benchmarks/bench_numpy_fft.py
new file mode 100644
index 0000000..5bb7f72
--- /dev/null
+++ b/benchmarks/benchmarks/bench_numpy_fft.py
@@ -0,0 +1,157 @@
+"""Benchmarks for mkl_fft.interfaces.numpy_fft.
+
+Covers every function exported by the interface:
+  fft / ifft        — 1-D C2C
+  rfft / irfft      — 1-D R2C / C2R
+  hfft / ihfft      — 1-D Hermitian
+  fft2 / ifft2      — 2-D C2C
+  rfft2 / irfft2    — 2-D R2C / C2R
+  fftn / ifftn      — N-D C2C
+  rfftn / irfftn    — N-D R2C / C2R
+"""
+
+from mkl_fft.interfaces import numpy_fft
+
+from ._utils import (
+    _DTYPES_ALL,
+    _DTYPES_REAL,
+    _DTYPES_REDUCED,
+    _SHAPES_2D_IFACE,
+    _SHAPES_3D,
+    BenchC2C,
+    BenchR2C,
+)
+
+_SIZES_1D = [256, 1024, 16384]
+
+
+# ---------------------------------------------------------------------------
+# 1-D complex-to-complex
+# ---------------------------------------------------------------------------
+
+
+class BenchC2C1D(BenchC2C):
+    """numpy_fft.fft / ifft — 1-D."""
+
+    params = [_SIZES_1D, _DTYPES_ALL]
+    param_names = ["n", "dtype"]
+
+    def time_fft(self, n, dtype):
+        numpy_fft.fft(self.x)
+
+    def time_ifft(self, n, dtype):
+        numpy_fft.ifft(self.x)
+
+
+# ---------------------------------------------------------------------------
+# 1-D real-to-complex / complex-to-real
+# ---------------------------------------------------------------------------
+
+
+class BenchRC1D(BenchR2C):
+    """numpy_fft.rfft / irfft — 1-D."""
+
+    params = [_SIZES_1D, _DTYPES_REAL]
+    param_names = ["n", "dtype"]
+
+    def time_rfft(self, n, dtype):
+        numpy_fft.rfft(self.x_real)
+
+    def time_irfft(self, n, dtype):
+        numpy_fft.irfft(self.x_complex, n=n)
+
+
+# ---------------------------------------------------------------------------
+# 1-D Hermitian
+# hfft:  input complex length n//2+1  →  output real length n
+# ihfft: input real  length n         →  output complex length n//2+1
+# ---------------------------------------------------------------------------
+
+
+class BenchHermitian1D(BenchR2C):
+    """numpy_fft.hfft / ihfft — 1-D Hermitian.
+
+    *dtype* is the **output** dtype of hfft (real); the inverse ihfft
+    takes the same real input and produces the corresponding complex output.
+    """
+
+    params = [_SIZES_1D, _DTYPES_REAL]
+    param_names = ["n", "dtype"]
+
+    def time_hfft(self, n, dtype):
+        numpy_fft.hfft(self.x_complex, n=n)
+
+    def time_ihfft(self, n, dtype):
+        numpy_fft.ihfft(self.x_real)
+
+
+# ---------------------------------------------------------------------------
+# 2-D complex-to-complex
+# ---------------------------------------------------------------------------
+
+
+class BenchC2C2D(BenchC2C):
+    """numpy_fft.fft2 / ifft2 — 2-D."""
+
+    params = [_SHAPES_2D_IFACE, _DTYPES_REDUCED]
+    param_names = ["shape", "dtype"]
+
+    def time_fft2(self, shape, dtype):
+        numpy_fft.fft2(self.x)
+
+    def time_ifft2(self, shape, dtype):
+        numpy_fft.ifft2(self.x)
+
+
+# ---------------------------------------------------------------------------
+# 2-D real-to-complex / complex-to-real
+# ---------------------------------------------------------------------------
+
+
+class BenchRC2D(BenchR2C):
+    """numpy_fft.rfft2 / irfft2 — 2-D."""
+
+    params = [_SHAPES_2D_IFACE, _DTYPES_REAL]
+    param_names = ["shape", "dtype"]
+
+    def time_rfft2(self, shape, dtype):
+        numpy_fft.rfft2(self.x_real)
+
+    def time_irfft2(self, shape, dtype):
+        numpy_fft.irfft2(self.x_complex, s=shape)
+
+
+# ---------------------------------------------------------------------------
+# N-D complex-to-complex
+# ---------------------------------------------------------------------------
+
+
+class BenchC2CND(BenchC2C):
+    """numpy_fft.fftn / ifftn — N-D."""
+
+    params = [_SHAPES_3D, _DTYPES_REDUCED]
+    param_names = ["shape", "dtype"]
+
+    def time_fftn(self, shape, dtype):
+        numpy_fft.fftn(self.x)
+
+    def time_ifftn(self, shape, dtype):
+        numpy_fft.ifftn(self.x)
+
+
+# ---------------------------------------------------------------------------
+# N-D real-to-complex / complex-to-real
+# ---------------------------------------------------------------------------
+
+
+class BenchRCND(BenchR2C):
+    """numpy_fft.rfftn / irfftn — N-D."""
+
+    params = [_SHAPES_3D, _DTYPES_REAL]
+    param_names = ["shape", "dtype"]
+
+    def time_rfftn(self, shape, dtype):
+        numpy_fft.rfftn(self.x_real)
+
+    def time_irfftn(self, shape, dtype):
+        numpy_fft.irfftn(self.x_complex, s=shape)
diff --git a/benchmarks/benchmarks/bench_scipy_fft.py b/benchmarks/benchmarks/bench_scipy_fft.py
new file mode 100644
index 0000000..e397cde
--- /dev/null
+++ b/benchmarks/benchmarks/bench_scipy_fft.py
@@ -0,0 +1,205 @@
+"""Benchmarks for mkl_fft.interfaces.scipy_fft.
+
+Covers every function exported by the interface:
+  fft / ifft          — 1-D C2C
+  rfft / irfft        — 1-D R2C / C2R
+  hfft / ihfft        — 1-D Hermitian
+  fft2 / ifft2        — 2-D C2C
+  rfft2 / irfft2      — 2-D R2C / C2R
+  hfft2 / ihfft2      — 2-D Hermitian  (scipy_fft only)
+  fftn / ifftn        — N-D C2C
+  rfftn / irfftn      — N-D R2C / C2R
+  hfftn / ihfftn      — N-D Hermitian  (scipy_fft only)
+"""
+
+from mkl_fft.interfaces import scipy_fft
+
+from ._utils import (
+    _DTYPES_ALL,
+    _DTYPES_REAL,
+    _DTYPES_REDUCED,
+    _SHAPES_2D_IFACE,
+    _SHAPES_3D,
+    BenchC2C,
+    BenchR2C,
+)
+
+_SIZES_1D = [256, 1024, 16384]
+
+
+# ---------------------------------------------------------------------------
+# 1-D complex-to-complex
+# ---------------------------------------------------------------------------
+
+
+class BenchC2C1D(BenchC2C):
+    """scipy_fft.fft / ifft — 1-D."""
+
+    params = [_SIZES_1D, _DTYPES_ALL]
+    param_names = ["n", "dtype"]
+
+    def time_fft(self, n, dtype):
+        scipy_fft.fft(self.x)
+
+    def time_ifft(self, n, dtype):
+        scipy_fft.ifft(self.x)
+
+
+# ---------------------------------------------------------------------------
+# 1-D real-to-complex / complex-to-real
+# ---------------------------------------------------------------------------
+
+
+class BenchRC1D(BenchR2C):
+    """scipy_fft.rfft / irfft — 1-D."""
+
+    params = [_SIZES_1D, _DTYPES_REAL]
+    param_names = ["n", "dtype"]
+
+    def time_rfft(self, n, dtype):
+        scipy_fft.rfft(self.x_real)
+
+    def time_irfft(self, n, dtype):
+        scipy_fft.irfft(self.x_complex, n=n)
+
+
+# ---------------------------------------------------------------------------
+# 1-D Hermitian
+# hfft:  input complex length n//2+1  →  output real length n
+# ihfft: input real  length n         →  output complex length n//2+1
+# ---------------------------------------------------------------------------
+
+
+class BenchHermitian1D(BenchR2C):
+    """scipy_fft.hfft / ihfft — 1-D Hermitian.
+
+    *dtype* is the **output** dtype of hfft (real); the corresponding
+    complex input dtype is derived automatically.
+    """
+
+    params = [_SIZES_1D, _DTYPES_REAL]
+    param_names = ["n", "dtype"]
+
+    def time_hfft(self, n, dtype):
+        scipy_fft.hfft(self.x_complex, n=n)
+
+    def time_ihfft(self, n, dtype):
+        scipy_fft.ihfft(self.x_real)
+
+
+# ---------------------------------------------------------------------------
+# 2-D complex-to-complex
+# ---------------------------------------------------------------------------
+
+
+class BenchC2C2D(BenchC2C):
+    """scipy_fft.fft2 / ifft2 — 2-D."""
+
+    params = [_SHAPES_2D_IFACE, _DTYPES_REDUCED]
+    param_names = ["shape", "dtype"]
+
+    def time_fft2(self, shape, dtype):
+        scipy_fft.fft2(self.x)
+
+    def time_ifft2(self, shape, dtype):
+        scipy_fft.ifft2(self.x)
+
+
+# ---------------------------------------------------------------------------
+# 2-D real-to-complex / complex-to-real
+# ---------------------------------------------------------------------------
+
+
+class BenchRC2D(BenchR2C):
+    """scipy_fft.rfft2 / irfft2 — 2-D."""
+
+    params = [_SHAPES_2D_IFACE, _DTYPES_REAL]
+    param_names = ["shape", "dtype"]
+
+    def time_rfft2(self, shape, dtype):
+        scipy_fft.rfft2(self.x_real)
+
+    def time_irfft2(self, shape, dtype):
+        scipy_fft.irfft2(self.x_complex, s=shape)
+
+
+# ---------------------------------------------------------------------------
+# 2-D Hermitian  (scipy_fft only — not in numpy_fft interface)
+# hfft2:  input complex shape (M, N//2+1)  →  output real shape (M, N)
+# ihfft2: input real  shape (M, N)         →  output complex shape (M, N//2+1)
+# ---------------------------------------------------------------------------
+
+
+class BenchHermitian2D(BenchR2C):
+    """scipy_fft.hfft2 / ihfft2 — 2-D Hermitian.
+
+    *dtype* is the **output** dtype of hfft2 (real).
+    """
+
+    params = [_SHAPES_2D_IFACE, _DTYPES_REAL]
+    param_names = ["shape", "dtype"]
+
+    def time_hfft2(self, shape, dtype):
+        scipy_fft.hfft2(self.x_complex, s=shape)
+
+    def time_ihfft2(self, shape, dtype):
+        scipy_fft.ihfft2(self.x_real)
+
+
+# ---------------------------------------------------------------------------
+# N-D complex-to-complex
+# ---------------------------------------------------------------------------
+
+
+class BenchC2CND(BenchC2C):
+    """scipy_fft.fftn / ifftn — N-D."""
+
+    params = [_SHAPES_3D, _DTYPES_REDUCED]
+    param_names = ["shape", "dtype"]
+
+    def time_fftn(self, shape, dtype):
+        scipy_fft.fftn(self.x)
+
+    def time_ifftn(self, shape, dtype):
+        scipy_fft.ifftn(self.x)
+
+
+# ---------------------------------------------------------------------------
+# N-D real-to-complex / complex-to-real
+# ---------------------------------------------------------------------------
+
+
+class BenchRCND(BenchR2C):
+    """scipy_fft.rfftn / irfftn — N-D."""
+
+    params = [_SHAPES_3D, _DTYPES_REAL]
+    param_names = ["shape", "dtype"]
+
+    def time_rfftn(self, shape, dtype):
+        scipy_fft.rfftn(self.x_real)
+
+    def time_irfftn(self, shape, dtype):
+        scipy_fft.irfftn(self.x_complex, s=shape)
+
+
+# ---------------------------------------------------------------------------
+# N-D Hermitian  (scipy_fft only)
+# hfftn:  input complex, last axis length s[-1]//2+1  →  output real shape s
+# ihfftn: input real  shape s  →  output complex, last axis length s[-1]//2+1
+# ---------------------------------------------------------------------------
+
+
+class BenchHermitianND(BenchR2C):
+    """scipy_fft.hfftn / ihfftn — N-D Hermitian.
+
+    *dtype* is the **output** dtype of hfftn (real).
+    """
+
+    params = [_SHAPES_3D, _DTYPES_REAL]
+    param_names = ["shape", "dtype"]
+
+    def time_hfftn(self, shape, dtype):
+        scipy_fft.hfftn(self.x_complex, s=shape)
+
+    def time_ihfftn(self, shape, dtype):
+        scipy_fft.ihfftn(self.x_real)