Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 42 additions & 4 deletions backends/nxp/backend/edge_program_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,22 +73,28 @@ class EdgeProgramToIRConverter:
_default_target_spec = NeutronTargetSpec("imxrt700")
_default_delegation_options = CustomDelegationOptions()

def __init__(self):
self.edge_to_tflite_map = {}

def convert_program(
self,
edge_program: ExportedProgram,
conversion_config: ConversionConfig = _default_conversion_config,
neutron_target_spec: NeutronTargetSpec = _default_target_spec,
custom_delegation_options: CustomDelegationOptions = _default_delegation_options,
) -> tuple[bytes, dict[str, dict[str, DataFormat]]]:
) -> tuple[bytes, dict[str, dict[str, DataFormat]], dict[int, tuple[int, ...]]]:
"""
Convert ExportedProgram in Edge dialect to IR (TFLite flatbuffers) as bytes.

:param edge_program: Converter ExportedProgram.
:param conversion_config: ConversionConfig instance.
:param neutron_target_spec: Object for querying the target platform to retrieve its properties.
:param custom_delegation_options: Custom user options which affect node delegation.
:return: TFLite flatbuffers as bytes.
:return: TFLite flatbuffers as bytes, I/O formats, and edge-to-tflite mapping.
"""
# Reset the edge to tflite map for each conversion
self.edge_to_tflite_map = {}

parameters_mapping = self.map_inputs_to_parameters(edge_program)
dim_order_map = self.map_nodes_to_dim_order(edge_program)

Expand All @@ -112,14 +118,17 @@ def convert_program(
# Apply optimizations and finalize the model.
internal_tflite_model = cc.tflite_builder.finish()

# Get the final edge to tflite mapping after optimization
self.edge_to_tflite_map = cc.tflite_builder.edge_to_tflite_map

# Extract the formats of the model's inputs and outputs.
io_formats = cc.tflite_builder.get_io_formats(edge_program.graph_signature)

# TFLite model generation
flatbuffers_builder = flatbuffers.Builder()
internal_tflite_model.gen_tflite(flatbuffers_builder)

return bytes(flatbuffers_builder.Output()), io_formats
return bytes(flatbuffers_builder.Output()), io_formats, self.edge_to_tflite_map

@staticmethod
def append_placeholders_and_tensors(nodes: list[Node], context: ConversionContext):
Expand Down Expand Up @@ -161,7 +170,6 @@ def _process_nodes(self, nodes: list[Node], conversion_context: ConversionContex
exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
]

for node in nodes:
if node.op == "call_function":
if node.target in qdq_related_functions and "cluster" in node.meta:
Expand All @@ -173,7 +181,37 @@ def _process_nodes(self, nodes: list[Node], conversion_context: ConversionContex
# The node was already processed alongside the Q/DQ ops.
pass
elif node.target in functions_converters:
# Get TFLite op count BEFORE conversion
tflite_op_count_before = len(
conversion_context.tflite_builder.get_operators().vector
)
# Convert the node
functions_converters[node.target](conversion_context).convert(node)
# Get TFLite op count AFTER conversion
tflite_op_count_after = len(
conversion_context.tflite_builder.get_operators().vector
)

# Track the mapping - store edge debug handle in operators.
# Get the edge debug handle so it can be associated with newly created operators.
edge_debug_handle = node.meta.get("debug_handle", None)
if (
edge_debug_handle is not None
and tflite_op_count_after > tflite_op_count_before
):
operators = (
conversion_context.tflite_builder.get_operators().vector
)
# Node converters append new operators to the TFLite builder.
# Only operators added during this conversion step (from "before" to "after")
# are tagged with the current edge_debug_handle.
for i in range(tflite_op_count_before, tflite_op_count_after):
# Store edge debug handle in operator's temporary attribute
operators[i].tmp_edge_debug_handle = edge_debug_handle
logger.d(
f"Tagged TFLite ops {list(range(tflite_op_count_before, tflite_op_count_after))} with edge debug_handle={edge_debug_handle} for node '{node.name}'"
)

else:
logger.e(
logger.Code.NOT_IMPLEMENTED,
Expand Down
31 changes: 31 additions & 0 deletions backends/nxp/backend/ir/converter/builder/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ class ModelBuilder:

conversion_config: ConversionConfig

edge_to_tflite_map: dict[
int, tuple[int, ...]
] # Mapping edge debug handles to tuple of TFLite operator indices

_default_conversion_config = ConversionConfig()

def __init__(
Expand All @@ -105,6 +109,7 @@ def __init__(
self._nchw_tensor_version = {}
self._skipped_output_map = {}
self._zeros_tensor_map = {}
self.edge_to_tflite_map = {}

def create_zeros_tensor(
self, dims: List[int], name: str, dtype: np.dtype, can_reuse: bool = False
Expand Down Expand Up @@ -503,6 +508,9 @@ def finish(self) -> tflite_model.Model:
self.conversion_config.optimization_blacklist,
)

# Create the final edge-to-tflite mapping after model optimization
self._create_edge_to_tflite_mapping()

self._keep_one_empty_buffer()

# Remove outputs, which are not produced by any node. Otherwise, there would be errors after inference.
Expand All @@ -524,6 +532,29 @@ def finish(self) -> tflite_model.Model:

return self._tfl_model

def _create_edge_to_tflite_mapping(self):
"""Create edge-to-TFLite mapping and save it to the edge_to_tflite_map class variable.

This function should be called after all model optimizations have been applied to match the output TFLite model.
"""

edge_to_tflite_dict = {}
for idx, op in enumerate(self.get_operators().vector):
if (
hasattr(op, "tmp_edge_debug_handle")
and op.tmp_edge_debug_handle is not None
):
debug_handle = op.tmp_edge_debug_handle
if debug_handle not in edge_to_tflite_dict:
edge_to_tflite_dict[debug_handle] = []
edge_to_tflite_dict[debug_handle].append(idx)

# Convert lists to tuples in the dictionary
self.edge_to_tflite_map = {k: tuple(v) for k, v in edge_to_tflite_dict.items()}
logger.i(
f"\nFinal edge_to_tflite_map after optimization: {self.edge_to_tflite_map}"
)

def _assign_io_tensor_indices(self, inputs, outputs, allow_inputs_stripping: bool):
for tensor in outputs.tmp_outputs:
try:
Expand Down
5 changes: 5 additions & 0 deletions backends/nxp/backend/ir/tflite_generator/tflite_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,9 @@ class Operator(meta.TFLiteObject):
# If `True`, this is an extra operator added during conversion. It was not present in the original input model.
tmp_added_extra: bool

# Edge program debug handle for mapping edge nodes to TFLite operators
tmp_edge_debug_handle: Optional[int]

def __init__(
self,
inputs: OperatorInputs = None,
Expand Down Expand Up @@ -541,6 +544,8 @@ def __init__(
self.tmp_version = 1
self.tmp_added_extra = False

self.tmp_edge_debug_handle = None

def uses_per_channel_quantization(self) -> bool:
"""Determine if this operator uses per-channel quantization."""
for tensor in itertools.chain(self.tmp_inputs, self.tmp_outputs):
Expand Down
12 changes: 12 additions & 0 deletions backends/nxp/backend/neutron_converter_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,15 @@ def _build_compilation_context(compilation_opts):
cctx.compilationOpts.dumpKernelSelectionCode = compilation_opts[
"dumpKernelSelectionCode"
]
if (
hasattr(cctx.compilationOpts, "useProfiling")
and compilation_opts["useProfiling"]
):
cctx.compilationOpts.useProfiling = compilation_opts["useProfiling"]
cctx.compilationOpts.dumpAfterImport = "console"
cctx.compilationOpts.dumpAfterGenerate = "console"
cctx.compilationOpts.verbose = compilation_opts["useProfiling"]

return cctx


Expand Down Expand Up @@ -81,6 +90,7 @@ def convert(
target: str,
delegation_tag: str,
fetch_constants_to_sram: bool = False,
use_profiling: bool = False,
) -> bytes:
"""
Call Neutron Converter.
Expand All @@ -89,6 +99,7 @@ def convert(
:param target: The target platform.
:param delegation_tag: The delegation tag of model partition.
:param fetch_constants_to_sram: Add microcode that fetches weights from external memory.
:param use_profiling: Use profiling for neutron delegated model.
This allows running models which do not fit into SRAM. Applies to Neutron-C only (microcontrollers).

:return: TFLite model with Neutron microcode as bytes.
Expand All @@ -102,6 +113,7 @@ def convert(
"excludeGraphPasses": "HoistSliceAboveTranspose,MergeTranspose",
"fetchConstantsToSRAM": fetch_constants_to_sram,
"dumpKernelSelectionCode": self.dump_kernel_selection_code,
"useProfiling": use_profiling,
}

# Try to use multiprocessing for isolation, but fall back to direct execution
Expand Down
Loading
Loading