llvm
diff --git a/‎examples/ingress/convert-kernel-bench-to-mlir.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/ingress/convert-kernel-bench-to-mlir.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/llama/test_llama3.py‎
Lines changed: 28 additions & 30 deletions b/‎examples/llama/test_llama3.py‎
Lines changed: 28 additions & 30 deletions
diff --git a/‎examples/mlir/compile_and_run.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/mlir/compile_and_run.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/workload/example.py‎
Lines changed: 7 additions & 14 deletions b/‎examples/workload/example.py‎
Lines changed: 7 additions & 14 deletions
diff --git a/‎examples/workload/example_mlir.py‎
Lines changed: 13 additions & 18 deletions b/‎examples/workload/example_mlir.py‎
Lines changed: 13 additions & 18 deletions
@@ -15,7 +15,7 @@
 from typing import Iterable
 
 from mlir import ir, passmanager
-from lighthouse.ingress import torch as torch_ingress
+import lighthouse.ingress as lh_ingress
 
 project_root = Path(__file__).parent.parent.parent
 torch_kernels_dir = project_root / "third_party" / "KernelBench" / "KernelBench"
@@ -173,7 +173,7 @@ def process_task(task: KernelConversionTask):
     print("Processing:", kernel_relative_name)
 
     try:
-        mlir_kernel = torch_ingress.import_from_file(task.torch_path, ir_context=ctx)
+        mlir_kernel = lh_ingress.torch.import_from_file(task.torch_path, ir_context=ctx)
         assert isinstance(mlir_kernel, ir.Module)
     except Exception as e:
         print(
 
@@ -6,17 +6,16 @@
 import pytest
 import torch
 
-
 from mlir import ir
 from mlir.dialects import transform, func, linalg, tensor, arith, complex, math
 from mlir.dialects.linalg import ElementwiseKind
 from mlir.dialects.transform import structured, bufferization, interpreter
 from mlir.passmanager import PassManager
-from mlir.runtime.np_to_memref import (
-    get_ranked_memref_descriptor,
-)
+from mlir.runtime.np_to_memref import get_ranked_memref_descriptor
 from mlir.execution_engine import ExecutionEngine
 
+from lighthouse import utils as lh_utils
+
 from ref_model import (
     Attention,
     ModelArgs,
@@ -26,7 +25,6 @@
     TransformerBlock,
     Transformer,
 )
-from lighthouse.utils.runtime import ffi as ffi_utils, torch as torch_utils
 
 
 def with_mlir_ctx_and_location(func):
@@ -1021,7 +1019,7 @@ def bin_op(a, b, out):
     eng = ExecutionEngine(module, opt_level=2)
     func_ptr = eng.lookup("bin_op")
 
-    torch_dtype = torch_utils.mlir_type_to_torch_dtype(ir_type)
+    torch_dtype = lh_utils.torch.dtype_from_mlir_type(ir_type)
     a = torch.randn(*shape, dtype=torch_dtype)
     b = torch.randn(*shape, dtype=torch_dtype)
     out_ref = references[op](a, b)
@@ -1031,7 +1029,7 @@ def bin_op(a, b, out):
     a_mem = get_ranked_memref_descriptor(a.numpy())
     b_mem = get_ranked_memref_descriptor(b.numpy())
     out_mem = get_ranked_memref_descriptor(out.numpy())
-    args = ffi_utils.memrefs_to_packed_args([a_mem, b_mem, out_mem])
+    args = lh_utils.memref.to_packed_args([a_mem, b_mem, out_mem])
     func_ptr(args)
 
     assert torch.allclose(out, out_ref, rtol=0.01, atol=0.01, equal_nan=True)
@@ -1077,14 +1075,14 @@ def unary_op(a, out):
     eng = ExecutionEngine(module, opt_level=2)
     func_ptr = eng.lookup("unary_op")
 
-    torch_dtype = torch_utils.mlir_type_to_torch_dtype(ir_type)
+    torch_dtype = lh_utils.torch.dtype_from_mlir_type(ir_type)
     a = torch.randn(*shape, dtype=torch_dtype)
     out_ref = references[op](a)
     out = torch.empty_like(out_ref)
 
     a_mem = get_ranked_memref_descriptor(a.numpy())
     out_mem = get_ranked_memref_descriptor(out.numpy())
-    args = ffi_utils.memrefs_to_packed_args([a_mem, out_mem])
+    args = lh_utils.memref.to_packed_args([a_mem, out_mem])
     func_ptr(args)
 
     assert torch.allclose(out, out_ref, rtol=0.01, atol=0.01, equal_nan=True)
@@ -1113,13 +1111,13 @@ def rms_norm(a, out):
 
     eng = ExecutionEngine(module, opt_level=2)
     func_ptr = eng.lookup("rms_norm")
-    torch_dtype = torch_utils.mlir_type_to_torch_dtype(ir_type)
+    torch_dtype = lh_utils.torch.dtype_from_mlir_type(ir_type)
     a = torch.randn(*shape, dtype=torch_dtype)
     out_ref = references[get_l2_norm](a, eps)
     out = torch.empty_like(out_ref)
     a_mem = get_ranked_memref_descriptor(a.numpy())
     out_mem = get_ranked_memref_descriptor(out.numpy())
-    args = ffi_utils.memrefs_to_packed_args([a_mem, out_mem])
+    args = lh_utils.memref.to_packed_args([a_mem, out_mem])
     func_ptr(args)
 
     assert torch.allclose(out, out_ref, rtol=0.01, atol=0.01, equal_nan=True)
@@ -1161,7 +1159,7 @@ def linear_op(x, w, b, out):
 
     eng = ExecutionEngine(module, opt_level=2)
     func_ptr = eng.lookup("linear_op")
-    torch_dtype = torch_utils.mlir_type_to_torch_dtype(ir_type)
+    torch_dtype = lh_utils.torch.dtype_from_mlir_type(ir_type)
     x = torch.randn(*shape, in_features, dtype=torch_dtype)
     w = torch.randn(out_features, in_features, dtype=torch_dtype)
     b = torch.randn(out_features, dtype=torch_dtype)
@@ -1172,7 +1170,7 @@ def linear_op(x, w, b, out):
     w_mem = get_ranked_memref_descriptor(w.numpy())
     b_mem = get_ranked_memref_descriptor(b.numpy())
     out_mem = get_ranked_memref_descriptor(out.numpy())
-    args = ffi_utils.memrefs_to_packed_args([x_mem, w_mem, b_mem, out_mem])
+    args = lh_utils.memref.to_packed_args([x_mem, w_mem, b_mem, out_mem])
     func_ptr(args)
     assert torch.allclose(out, out_ref, rtol=0.01, atol=0.01, equal_nan=True)
 
@@ -1202,15 +1200,15 @@ def polar_op(magnitude, angle, out):
 
     eng = ExecutionEngine(module, opt_level=2)
     func_ptr = eng.lookup("polar_op")
-    torch_dtype = torch_utils.mlir_type_to_torch_dtype(ir_type)
+    torch_dtype = lh_utils.torch.dtype_from_mlir_type(ir_type)
     magnitude = torch.randn(4, 16, dtype=torch_dtype)
     angle = torch.randn(4, 16, dtype=torch_dtype)
     out_ref = references[get_polar](magnitude, angle)
     out = torch.empty_like(out_ref)
     magnitude_mem = get_ranked_memref_descriptor(magnitude.numpy())
     angle_mem = get_ranked_memref_descriptor(angle.numpy())
     out_mem = get_ranked_memref_descriptor(out.numpy())
-    args = ffi_utils.memrefs_to_packed_args([magnitude_mem, angle_mem, out_mem])
+    args = lh_utils.memref.to_packed_args([magnitude_mem, angle_mem, out_mem])
     func_ptr(args)
     assert torch.allclose(out, out_ref, rtol=0.01, atol=0.01, equal_nan=True)
 
@@ -1238,14 +1236,14 @@ def repeat_kv_op(x, out):
     eng = ExecutionEngine(module, opt_level=2)
     func_ptr = eng.lookup("repeat_kv_op")
 
-    torch_dtype = torch_utils.mlir_type_to_torch_dtype(ir_type)
+    torch_dtype = lh_utils.torch.dtype_from_mlir_type(ir_type)
     x = torch.randn(2, 512, 8, 64, dtype=torch_dtype)
     out_ref = references[get_repeat_kv](x, n_rep)
     out = torch.empty_like(out_ref)
 
     x_mem = get_ranked_memref_descriptor(x.numpy())
     out_mem = get_ranked_memref_descriptor(out.numpy())
-    args = ffi_utils.memrefs_to_packed_args([x_mem, out_mem])
+    args = lh_utils.memref.to_packed_args([x_mem, out_mem])
     func_ptr(args)
 
     assert torch.allclose(out, out_ref, rtol=0.01, atol=0.01, equal_nan=True)
@@ -1276,7 +1274,7 @@ def reshape_for_broadcast_op(freqs_cis, x, out):
     eng = ExecutionEngine(module, opt_level=2)
     func_ptr = eng.lookup("reshape_for_broadcast")
 
-    torch_dtype = torch_utils.mlir_type_to_torch_dtype(ir_type)
+    torch_dtype = lh_utils.torch.dtype_from_mlir_type(ir_type)
     freqs_cis = torch.randn(512, 64, dtype=torch_dtype)
     x = torch.randn(2, 512, 32, 128, dtype=torch_dtype)
     # Convert x to complex view as expected by reshape_for_broadcast
@@ -1287,7 +1285,7 @@ def reshape_for_broadcast_op(freqs_cis, x, out):
     freqs_cis_mem = get_ranked_memref_descriptor(freqs_cis.numpy())
     x_mem = get_ranked_memref_descriptor(x.numpy())
     out_mem = get_ranked_memref_descriptor(out.numpy())
-    args = ffi_utils.memrefs_to_packed_args([freqs_cis_mem, x_mem, out_mem])
+    args = lh_utils.memref.to_packed_args([freqs_cis_mem, x_mem, out_mem])
     func_ptr(args)
 
     assert torch.allclose(out, out_ref, rtol=0.01, atol=0.01, equal_nan=True)
@@ -1318,15 +1316,15 @@ def view_as_complex_op(x, out):
     eng = ExecutionEngine(module, opt_level=2)
     func_ptr = eng.lookup("view_as_complex_op")
 
-    torch_dtype = torch_utils.mlir_type_to_torch_dtype(ir_type)
+    torch_dtype = lh_utils.torch.dtype_from_mlir_type(ir_type)
     x = torch.randn(2, 512, 32, 128, dtype=torch_dtype)
     x_reshaped = x.reshape(2, 512, 32, 64, 2)
     out_ref = torch.view_as_complex(x_reshaped)
     out = torch.empty_like(out_ref)
 
     x_mem = get_ranked_memref_descriptor(x_reshaped.numpy())
     out_mem = get_ranked_memref_descriptor(out.numpy())
-    args = ffi_utils.memrefs_to_packed_args([x_mem, out_mem])
+    args = lh_utils.memref.to_packed_args([x_mem, out_mem])
     func_ptr(args)
 
     assert torch.allclose(out, out_ref, rtol=0.01, atol=0.01, equal_nan=True)
@@ -1354,15 +1352,15 @@ def as_real_op(x, out):
     eng = ExecutionEngine(module, opt_level=2)
     func_ptr = eng.lookup("as_real_op")
 
-    torch_dtype = torch_utils.mlir_type_to_torch_dtype(ir_type)
+    torch_dtype = lh_utils.torch.dtype_from_mlir_type(ir_type)
     x = torch.randn(2, 512, 32, 64, 2, dtype=torch_dtype)
     x_complex = torch.view_as_complex(x)
     out_ref = torch.view_as_real(x_complex)
     out = torch.empty_like(out_ref)
 
     x_mem = get_ranked_memref_descriptor(x_complex.numpy())
     out_mem = get_ranked_memref_descriptor(out.numpy())
-    args = ffi_utils.memrefs_to_packed_args([x_mem, out_mem])
+    args = lh_utils.memref.to_packed_args([x_mem, out_mem])
     func_ptr(args)
 
     assert torch.allclose(out, out_ref, rtol=0.01, atol=0.01, equal_nan=True)
@@ -1395,7 +1393,7 @@ def rotary_emb(xq, xk, freqs_cis, xq_out, xk_out):
         return module
 
     ir_type = to_ir_type(elem_type)
-    torch_dtype = torch_utils.mlir_type_to_torch_dtype(ir_type)
+    torch_dtype = lh_utils.torch.dtype_from_mlir_type(ir_type)
     xq_shape = (batch_size, seq_len, n_heads, head_dim)
     xk_shape = (batch_size, seq_len, n_kv_heads, head_dim)
     freqs_cis_shape = (seq_len, head_dim // 2)
@@ -1424,7 +1422,7 @@ def rotary_emb(xq, xk, freqs_cis, xq_out, xk_out):
     freqs_cis_mem = get_ranked_memref_descriptor(freqs_cis.numpy())
     out1_mem = get_ranked_memref_descriptor(out1.numpy())
     out2_mem = get_ranked_memref_descriptor(out2.numpy())
-    args = ffi_utils.memrefs_to_packed_args(
+    args = lh_utils.memref.to_packed_args(
         [a_mem, b_mem, freqs_cis_mem, out1_mem, out2_mem]
     )
     func_ptr(args)
@@ -1489,7 +1487,7 @@ def feed_forward(x, w1, b1, w2, b2, w3, b3, out):
     eng = ExecutionEngine(module, opt_level=2)
     func_ptr = eng.lookup("feed_forward")
 
-    torch_dtype = torch_utils.mlir_type_to_torch_dtype(ir_type)
+    torch_dtype = lh_utils.torch.dtype_from_mlir_type(ir_type)
     x = torch.randn(4, 16, dtype=torch_dtype)
     w1 = torch.randn(64, 16, dtype=torch_dtype)
     b1 = torch.randn(64, dtype=torch_dtype)
@@ -1512,7 +1510,7 @@ def feed_forward(x, w1, b1, w2, b2, w3, b3, out):
     w3_mem = get_ranked_memref_descriptor(w3.numpy())
     b3_mem = get_ranked_memref_descriptor(b3.numpy())
     out_mem = get_ranked_memref_descriptor(out.numpy())
-    args = ffi_utils.memrefs_to_packed_args(
+    args = lh_utils.memref.to_packed_args(
         [x_mem, w1_mem, b1_mem, w2_mem, b2_mem, w3_mem, b3_mem, out_mem]
     )
     func_ptr(args)
@@ -1645,7 +1643,7 @@ def attention_op(x, wq, wk, wv, wo, freqs_cis, mask, out):
     freqs_cis_mem = get_ranked_memref_descriptor(freqs_cis_real.numpy())
     mask_mem = get_ranked_memref_descriptor(mask.numpy())
     out_mem = get_ranked_memref_descriptor(out.numpy())
-    args = ffi_utils.memrefs_to_packed_args(
+    args = lh_utils.memref.to_packed_args(
         [x_mem, wq_mem, wk_mem, wv_mem, wo_mem, freqs_cis_mem, mask_mem, out_mem]
     )
     func_ptr(args)
@@ -1792,7 +1790,7 @@ def transformer_block_op(
     b3_mem = get_ranked_memref_descriptor(b3.numpy())
     out_mem = get_ranked_memref_descriptor(out.numpy())
 
-    args = ffi_utils.memrefs_to_packed_args(
+    args = lh_utils.memref.to_packed_args(
         [
             x_mem,
             wq_mem,
@@ -1981,7 +1979,7 @@ def transformer_op(*params):
     out_mem = get_ranked_memref_descriptor(out.numpy())
     memrefs.append(out_mem)
 
-    args = ffi_utils.memrefs_to_packed_args(memrefs)
+    args = lh_utils.memref.to_packed_args(memrefs)
     func_ptr(args)
 
     assert torch.allclose(out, out_ref, rtol=0.01, atol=0.01, equal_nan=True)
@@ -10,7 +10,7 @@
 from mlir.execution_engine import ExecutionEngine
 from mlir.passmanager import PassManager
 
-from lighthouse.utils.runtime import torch as torch_utils
+import lighthouse.utils as lh_utils
 
 
 def create_kernel(ctx: ir.Context) -> ir.Module:
@@ -168,7 +168,7 @@ def main(args):
     out = torch.empty_like(out_ref)
 
     # Execute the kernel.
-    args = torch_utils.torch_to_packed_args([a, b, out])
+    args = lh_utils.torch.to_packed_args([a, b, out])
     add_func(args)
 
     ### Verification ###
 
@@ -5,27 +5,20 @@
 """
 Workload example: Element-wise sum of two (M, N) float32 arrays on CPU.
 """
+import ctypes
+from contextlib import contextmanager
+from functools import cached_property
+from typing import Optional
 
 import numpy as np
 from mlir import ir
 from mlir.runtime.np_to_memref import get_ranked_memref_descriptor
 from mlir.dialects import func, linalg, bufferization
 from mlir.dialects import transform
 from mlir.execution_engine import ExecutionEngine
-from contextlib import contextmanager
-from functools import cached_property
-import ctypes
-from typing import Optional
-from lighthouse.utils.mlir import (
-    apply_registered_pass,
-    canonicalize,
-    match,
-)
-from lighthouse.workload import (
-    Workload,
-    execute,
-    benchmark,
-)
+
+from lighthouse.utils.mlir import apply_registered_pass, canonicalize, match
+from lighthouse.workload import Workload, execute, benchmark
 
 
 class ElementwiseSum(Workload):
 
@@ -8,6 +8,9 @@
 In this example, allocation and deallocation of input arrays is done in MLIR.
 """
 
+import ctypes
+from contextlib import contextmanager
+
 import numpy as np
 from mlir import ir
 from mlir.runtime.np_to_memref import (
@@ -17,18 +20,11 @@
 )
 from mlir.dialects import func, linalg, arith, memref
 from mlir.execution_engine import ExecutionEngine
-import ctypes
-from contextlib import contextmanager
-from lighthouse.utils.runtime.ffi import (
-    get_packed_arg,
-    memrefs_to_packed_args,
-    memref_to_ctype,
-)
+
+from lighthouse.workload import execute, benchmark
+import lighthouse.utils as lh_utils
+
 from example import ElementwiseSum
-from lighthouse.workload import (
-    execute,
-    benchmark,
-)
 
 
 def emit_host_alloc(suffix: str, element_type: ir.Type, rank: int = 2):
@@ -114,16 +110,16 @@ def _allocate_array(
         # construct a memref descriptor for the result memref
         shape = (self.M, self.N)
         mref = make_nd_memref_descriptor(len(shape), as_ctype(self.dtype))()
-        ptr_mref = memref_to_ctype(mref)
+        ptr_mref = lh_utils.memref.to_ctype(mref)
         ptr_dims = [ctypes.pointer(ctypes.c_int32(d)) for d in shape]
-        alloc_func(get_packed_arg([ptr_mref, *ptr_dims]))
+        alloc_func(lh_utils.memref.get_packed_arg([ptr_mref, *ptr_dims]))
         self.memrefs[name] = mref
         return mref
 
     def _deallocate_all(self, execution_engine: ExecutionEngine):
         for mref in self.memrefs.values():
             dealloc_func = execution_engine.lookup("host_dealloc_f32")
-            dealloc_func(memrefs_to_packed_args([mref]))
+            dealloc_func(lh_utils.memref.to_packed_args([mref]))
         self.memrefs = {}
 
     def get_input_arrays(
@@ -136,10 +132,9 @@ def get_input_arrays(
         # initialize with MLIR
         fill_zero_func = execution_engine.lookup("host_fill_constant_zero_f32")
         fill_random_func = execution_engine.lookup("host_fill_random_f32")
-        fill_zero_func(memrefs_to_packed_args([C]))
-        fill_random_func(memrefs_to_packed_args([A]))
-        fill_random_func(memrefs_to_packed_args([B]))
-
+        fill_zero_func(lh_utils.memref.to_packed_args([C]))
+        fill_random_func(lh_utils.memref.to_packed_args([A]))
+        fill_random_func(lh_utils.memref.to_packed_args([B]))
         return [A, B, C]
 
     @contextmanager