Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/regression_test_aarch64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ jobs:
- name: Run python tests
run: |
conda activate venv
pytest -s test/quantization/test_int8_dynamic_activation_intx_weight_config_v1.py
pytest -s test/quantization/quantize_/workflows/intx/test_intx_opaque_tensor.py
pytest -s test/prototype/test_embedding.py
pytest -s test/prototype/test_int8_lut_tensor.py
Expand Down
12 changes: 0 additions & 12 deletions test/integration/test_load_and_run_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,6 @@
1,
"IntxWeightOnlyConfig",
),
# model card: https://huggingface.co/torchao-testing/single-linear-Int8DynamicActivationIntxWeightConfig-v1-0.14.dev
(
"torchao-testing/single-linear-Int8DynamicActivationIntxWeightConfig-v1-0.14.dev",
1,
"Int8DynamicActivationIntxWeightConfig",
),
]

_DEPRECATED_MODEL_INFO = [
Expand All @@ -62,12 +56,6 @@
1,
"IntxWeightOnlyConfig",
),
# https://huggingface.co/torchao-testing/opt-125m-Int8DynamicActivationIntxWeightConfig-v1-0.14.0.dev
(
"torchao-testing/opt-125m-Int8DynamicActivationIntxWeightConfig-v1-0.14.0.dev",
1,
"Int8DynamicActivationIntxWeightConfig",
),
]

_SINGLE_LINEAR_MODEL_INFO = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
# This source code is licensed under the BSD 3-Clause license found in the
# LICENSE file in the root directory of this source tree.

import copy
import tempfile
import unittest

Expand All @@ -16,7 +15,6 @@
run_tests,
)

from torchao.dtypes import QDQLayout
from torchao.quantization import (
Int8DynamicActivationIntxWeightConfig,
IntxWeightOnlyConfig,
Expand Down Expand Up @@ -409,84 +407,6 @@ def test_qat_int8_dyn_act_intx_weight_config(
sqnr > 35, f"Got SQNR of {sqnr} between prepared and quantized"
)

@parameterized.expand(
[
param(
weight_dtype=weight_dtype,
group_size=group_size,
mapping_type=mapping_type,
act_mapping_type=act_mapping_type,
scale_dtype=scale_dtype,
model_dtype=model_dtype,
)
for weight_dtype in list(getattr(torch, f"int{x}") for x in range(1, 9))
for group_size in [32, 64, 128]
for mapping_type in [MappingType.SYMMETRIC]
for act_mapping_type in [MappingType.ASYMMETRIC]
for scale_dtype in [torch.float32, torch.bfloat16, torch.float16]
for model_dtype in [torch.float32, torch.bfloat16, torch.float16]
],
name_func=lambda f, _, params: f.__name__ + f"_{params.kwargs}",
)
def test_intx_unpacked_v2_is_close_to_qdq_v1(
self,
weight_dtype,
group_size,
mapping_type,
act_mapping_type,
scale_dtype,
model_dtype,
):
k0 = 512
k1 = 256
layers = [
torch.nn.Linear(k0, k1),
]
model = torch.nn.Sequential(*layers)
activations = torch.randn(
k0,
)

model = model.to(model_dtype)
activations = activations.to(model_dtype)

model_v1 = copy.deepcopy(model)
quantize_(
model_v1,
Int8DynamicActivationIntxWeightConfig(
weight_dtype=weight_dtype,
weight_granularity=PerGroup(group_size),
weight_mapping_type=mapping_type,
weight_scale_dtype=scale_dtype,
act_mapping_type=act_mapping_type,
version=1,
layout=QDQLayout(),
),
)
out_v1 = model_v1(activations)

quantize_(
model,
Int8DynamicActivationIntxWeightConfig(
weight_dtype=weight_dtype,
weight_granularity=PerGroup(group_size),
weight_mapping_type=mapping_type,
weight_scale_dtype=scale_dtype,
act_mapping_type=act_mapping_type,
intx_packing_format=IntxPackingFormat.UNPACKED_TO_INT8,
version=2,
),
)
out_v2 = model(activations)
sqnr = compute_error(out_v1, out_v2).item()

if model_dtype == torch.float32 and model_dtype == torch.float32:
self.assertTrue(sqnr == float("inf"), f"Got SQNR of {sqnr}")
else:
# There is slight difference in how v2 does dynamic activation quantization
# It uses the model_dtype, whereas v1 always uses float32
self.assertTrue(sqnr > 35, f"Got SQNR of {sqnr}")


if __name__ == "__main__":
run_tests()
Loading
Loading