|
48 | 48 | "source": [ |
49 | 49 | "import torch\n", |
50 | 50 | "\n", |
51 | | - "class Add(torch.nn.Module):\n", |
| 51 | + "class AddSigmoid(torch.nn.Module):\n", |
| 52 | + " def __init__(self):\n", |
| 53 | + " super().__init__()\n", |
| 54 | + " self.sigmoid = torch.nn.Sigmoid()\n", |
| 55 | + "\n", |
52 | 56 | " def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:\n", |
53 | | - " return x + y\n", |
| 57 | + " return self.sigmoid(x + y)\n", |
54 | 58 | "\n", |
55 | 59 | "example_inputs = (torch.ones(1,1,1,1),torch.ones(1,1,1,1))\n", |
56 | 60 | "\n", |
57 | | - "model = Add()\n", |
| 61 | + "model = AddSigmoid()\n", |
58 | 62 | "model = model.eval()\n", |
59 | 63 | "exported_program = torch.export.export(model, example_inputs)\n", |
60 | 64 | "graph_module = exported_program.graph_module\n", |
|
84 | 88 | "source": [ |
85 | 89 | "from executorch.backends.arm.vgf import VgfCompileSpec\n", |
86 | 90 | "\n", |
87 | | - "# Create a compilation spec describing the floating point target.\n", |
88 | | - "compile_spec = VgfCompileSpec(\"TOSA-1.0+FP\")\n", |
| 91 | + "# Create a compilation spec describing the target\n", |
| 92 | + "compile_spec = VgfCompileSpec()\n", |
89 | 93 | "\n", |
90 | 94 | "_ = graph_module.print_readable()\n", |
91 | 95 | "\n", |
|
99 | 103 | "source": [ |
100 | 104 | "To lower the graph_module for INT targets using the VGF backend, we apply the arm_quantizer. \n", |
101 | 105 | "\n", |
102 | | - "Quantization can be performed in various ways and tailored to different subgraphs; the sequence shown here represents the recommended workflow for VGF. \n", |
| 106 | + "Quantization can be performed in various ways and tailored to different subgraphs; it is even possible to opt out of quantization for selected layers and have them run in floating-point.\n", |
103 | 107 | "\n", |
104 | 108 | "This step also requires calibrating the module with representative inputs. \n", |
105 | 109 | "\n", |
|
120 | 124 | "from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e\n", |
121 | 125 | "\n", |
122 | 126 | "# Create a compilation spec describing the target for configuring the quantizer\n", |
123 | | - "compile_spec = VgfCompileSpec(\"TOSA-1.0+INT\")\n", |
| 127 | + "compile_spec = VgfCompileSpec()\n", |
124 | 128 | "\n", |
125 | 129 | "# Create and configure quantizer to use a symmetric quantization config globally on all nodes\n", |
126 | 130 | "quantizer = VgfQuantizer(compile_spec)\n", |
127 | 131 | "operator_config = get_symmetric_quantization_config(is_per_channel=False)\n", |
| 132 | + "\n", |
| 133 | + "# Set global (default) quantization config for the layers in the models.\n", |
| 134 | + "# Can also be set to `None` to let layers run in FP as default.\n", |
128 | 135 | "quantizer.set_global(operator_config)\n", |
129 | 136 | "\n", |
| 137 | + "# Skip quantizing all sigmoid ops (only one for this model); let it run in FP.\n", |
| 138 | + "# This step is optional; selecting which layers to include/exclude for\n", |
| 139 | + "# quantization is part of optimizing the model's performance.\n", |
| 140 | + "quantizer.set_module_type(torch.nn.Sigmoid, None)\n", |
| 141 | + "\n", |
130 | 142 | "# Post training quantization\n", |
131 | 143 | "quantized_graph_module = prepare_pt2e(graph_module, quantizer)\n", |
132 | 144 | "quantized_graph_module(*example_inputs) # Calibrate the graph module with the example input\n", |
|
142 | 154 | "cell_type": "markdown", |
143 | 155 | "metadata": {}, |
144 | 156 | "source": [ |
145 | | - "# In the example below, we will make use of the quantized graph module.\n", |
| 157 | + "# In the example below, we will make use of the (partially) quantized graph module.\n", |
146 | 158 | "\n", |
147 | 159 | "The lowering in the VGFBackend happens in five steps:\n", |
148 | 160 | "\n", |
|
0 commit comments