Skip to content

Commit 51bba65

Browse files
committed
deprecate v1 of Int8DynamicActivationIntxWeightConfig
Summary: Deprecating v1 of `Int8DynamicActivationIntxWeightConfig` and deleting all callsites. Test Plan: CI Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: 4175ed5 ghstack-comment-id: 3670561548 Pull-Request: #3511
1 parent fc8a635 commit 51bba65

File tree

4 files changed

+34
-922
lines changed

4 files changed

+34
-922
lines changed

test/quantization/quantize_/workflows/intx/test_intx_unpacked_to_int8_tensor.py

Lines changed: 0 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
# This source code is licensed under the BSD 3-Clause license found in the
55
# LICENSE file in the root directory of this source tree.
66

7-
import copy
87
import tempfile
98
import unittest
109

@@ -16,7 +15,6 @@
1615
run_tests,
1716
)
1817

19-
from torchao.dtypes import QDQLayout
2018
from torchao.quantization import (
2119
Int8DynamicActivationIntxWeightConfig,
2220
IntxWeightOnlyConfig,
@@ -409,84 +407,6 @@ def test_qat_int8_dyn_act_intx_weight_config(
409407
sqnr > 35, f"Got SQNR of {sqnr} between prepared and quantized"
410408
)
411409

412-
@parameterized.expand(
413-
[
414-
param(
415-
weight_dtype=weight_dtype,
416-
group_size=group_size,
417-
mapping_type=mapping_type,
418-
act_mapping_type=act_mapping_type,
419-
scale_dtype=scale_dtype,
420-
model_dtype=model_dtype,
421-
)
422-
for weight_dtype in list(getattr(torch, f"int{x}") for x in range(1, 9))
423-
for group_size in [32, 64, 128]
424-
for mapping_type in [MappingType.SYMMETRIC]
425-
for act_mapping_type in [MappingType.ASYMMETRIC]
426-
for scale_dtype in [torch.float32, torch.bfloat16, torch.float16]
427-
for model_dtype in [torch.float32, torch.bfloat16, torch.float16]
428-
],
429-
name_func=lambda f, _, params: f.__name__ + f"_{params.kwargs}",
430-
)
431-
def test_intx_unpacked_v2_is_close_to_qdq_v1(
432-
self,
433-
weight_dtype,
434-
group_size,
435-
mapping_type,
436-
act_mapping_type,
437-
scale_dtype,
438-
model_dtype,
439-
):
440-
k0 = 512
441-
k1 = 256
442-
layers = [
443-
torch.nn.Linear(k0, k1),
444-
]
445-
model = torch.nn.Sequential(*layers)
446-
activations = torch.randn(
447-
k0,
448-
)
449-
450-
model = model.to(model_dtype)
451-
activations = activations.to(model_dtype)
452-
453-
model_v1 = copy.deepcopy(model)
454-
quantize_(
455-
model_v1,
456-
Int8DynamicActivationIntxWeightConfig(
457-
weight_dtype=weight_dtype,
458-
weight_granularity=PerGroup(group_size),
459-
weight_mapping_type=mapping_type,
460-
weight_scale_dtype=scale_dtype,
461-
act_mapping_type=act_mapping_type,
462-
version=1,
463-
layout=QDQLayout(),
464-
),
465-
)
466-
out_v1 = model_v1(activations)
467-
468-
quantize_(
469-
model,
470-
Int8DynamicActivationIntxWeightConfig(
471-
weight_dtype=weight_dtype,
472-
weight_granularity=PerGroup(group_size),
473-
weight_mapping_type=mapping_type,
474-
weight_scale_dtype=scale_dtype,
475-
act_mapping_type=act_mapping_type,
476-
intx_packing_format=IntxPackingFormat.UNPACKED_TO_INT8,
477-
version=2,
478-
),
479-
)
480-
out_v2 = model(activations)
481-
sqnr = compute_error(out_v1, out_v2).item()
482-
483-
if model_dtype == torch.float32 and model_dtype == torch.float32:
484-
self.assertTrue(sqnr == float("inf"), f"Got SQNR of {sqnr}")
485-
else:
486-
# There is slight difference in how v2 does dynamic activation quantization
487-
# It uses the model_dtype, whereas v1 always uses float32
488-
self.assertTrue(sqnr > 35, f"Got SQNR of {sqnr}")
489-
490410

491411
if __name__ == "__main__":
492412
run_tests()

0 commit comments

Comments
 (0)