|
4 | 4 | # This source code is licensed under the BSD 3-Clause license found in the |
5 | 5 | # LICENSE file in the root directory of this source tree. |
6 | 6 |
|
7 | | -import copy |
8 | 7 | import tempfile |
9 | 8 | import unittest |
10 | 9 |
|
|
16 | 15 | run_tests, |
17 | 16 | ) |
18 | 17 |
|
19 | | -from torchao.dtypes import QDQLayout |
20 | 18 | from torchao.quantization import ( |
21 | 19 | Int8DynamicActivationIntxWeightConfig, |
22 | 20 | IntxWeightOnlyConfig, |
@@ -409,84 +407,6 @@ def test_qat_int8_dyn_act_intx_weight_config( |
409 | 407 | sqnr > 35, f"Got SQNR of {sqnr} between prepared and quantized" |
410 | 408 | ) |
411 | 409 |
|
412 | | - @parameterized.expand( |
413 | | - [ |
414 | | - param( |
415 | | - weight_dtype=weight_dtype, |
416 | | - group_size=group_size, |
417 | | - mapping_type=mapping_type, |
418 | | - act_mapping_type=act_mapping_type, |
419 | | - scale_dtype=scale_dtype, |
420 | | - model_dtype=model_dtype, |
421 | | - ) |
422 | | - for weight_dtype in list(getattr(torch, f"int{x}") for x in range(1, 9)) |
423 | | - for group_size in [32, 64, 128] |
424 | | - for mapping_type in [MappingType.SYMMETRIC] |
425 | | - for act_mapping_type in [MappingType.ASYMMETRIC] |
426 | | - for scale_dtype in [torch.float32, torch.bfloat16, torch.float16] |
427 | | - for model_dtype in [torch.float32, torch.bfloat16, torch.float16] |
428 | | - ], |
429 | | - name_func=lambda f, _, params: f.__name__ + f"_{params.kwargs}", |
430 | | - ) |
431 | | - def test_intx_unpacked_v2_is_close_to_qdq_v1( |
432 | | - self, |
433 | | - weight_dtype, |
434 | | - group_size, |
435 | | - mapping_type, |
436 | | - act_mapping_type, |
437 | | - scale_dtype, |
438 | | - model_dtype, |
439 | | - ): |
440 | | - k0 = 512 |
441 | | - k1 = 256 |
442 | | - layers = [ |
443 | | - torch.nn.Linear(k0, k1), |
444 | | - ] |
445 | | - model = torch.nn.Sequential(*layers) |
446 | | - activations = torch.randn( |
447 | | - k0, |
448 | | - ) |
449 | | - |
450 | | - model = model.to(model_dtype) |
451 | | - activations = activations.to(model_dtype) |
452 | | - |
453 | | - model_v1 = copy.deepcopy(model) |
454 | | - quantize_( |
455 | | - model_v1, |
456 | | - Int8DynamicActivationIntxWeightConfig( |
457 | | - weight_dtype=weight_dtype, |
458 | | - weight_granularity=PerGroup(group_size), |
459 | | - weight_mapping_type=mapping_type, |
460 | | - weight_scale_dtype=scale_dtype, |
461 | | - act_mapping_type=act_mapping_type, |
462 | | - version=1, |
463 | | - layout=QDQLayout(), |
464 | | - ), |
465 | | - ) |
466 | | - out_v1 = model_v1(activations) |
467 | | - |
468 | | - quantize_( |
469 | | - model, |
470 | | - Int8DynamicActivationIntxWeightConfig( |
471 | | - weight_dtype=weight_dtype, |
472 | | - weight_granularity=PerGroup(group_size), |
473 | | - weight_mapping_type=mapping_type, |
474 | | - weight_scale_dtype=scale_dtype, |
475 | | - act_mapping_type=act_mapping_type, |
476 | | - intx_packing_format=IntxPackingFormat.UNPACKED_TO_INT8, |
477 | | - version=2, |
478 | | - ), |
479 | | - ) |
480 | | - out_v2 = model(activations) |
481 | | - sqnr = compute_error(out_v1, out_v2).item() |
482 | | - |
483 | | - if model_dtype == torch.float32 and model_dtype == torch.float32: |
484 | | - self.assertTrue(sqnr == float("inf"), f"Got SQNR of {sqnr}") |
485 | | - else: |
486 | | - # There is slight difference in how v2 does dynamic activation quantization |
487 | | - # It uses the model_dtype, whereas v1 always uses float32 |
488 | | - self.assertTrue(sqnr > 35, f"Got SQNR of {sqnr}") |
489 | | - |
490 | 410 |
|
491 | 411 | if __name__ == "__main__": |
492 | 412 | run_tests() |
0 commit comments