port more case in test_quant_api.py to xpu

zxd1997066 · zxd1997066 · commit 58dad9b29e13 · 2025-12-19T13:42:14.000+08:00
diff --git a/test/quantization/test_quant_api.py b/test/quantization/test_quant_api.py
@@ -647,20 +647,20 @@ def test_module_fqn_to_config_module_name(self):
         assert isinstance(model.linear2.weight, AffineQuantizedTensor)
         assert isinstance(model.linear2.weight._layout, PlainLayout)
 
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
+    @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available")
     def test_module_fqn_to_config_regex_basic(self):
         config1 = Int4WeightOnlyConfig(
             group_size=32, int4_packing_format="tile_packed_to_4d"
         )
         config = ModuleFqnToConfig({"re:linear.": config1})
-        model = ToyLinearModel().cuda().to(dtype=torch.bfloat16)
-        example_inputs = model.example_inputs(device="cuda", dtype=torch.bfloat16)
+        model = ToyLinearModel().to(_DEVICE).to(dtype=torch.bfloat16)
+        example_inputs = model.example_inputs(device=_DEVICE, dtype=torch.bfloat16)
         quantize_(model, config, filter_fn=None)
         model(*example_inputs)
         assert isinstance(model.linear1.weight, Int4TilePackedTo4dTensor)
         assert isinstance(model.linear2.weight, Int4TilePackedTo4dTensor)
 
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
+    @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available")
     def test_module_fqn_to_config_regex_precedence(self):
         """Testing that full path config takes precedence over
         regex config in ModuleFqnToConfig
@@ -670,14 +670,14 @@ def test_module_fqn_to_config_regex_precedence(self):
         )
         config2 = IntxWeightOnlyConfig()
         config = ModuleFqnToConfig({"linear1": config1, "re:linear.": config2})
-        model = ToyLinearModel().cuda().to(dtype=torch.bfloat16)
-        example_inputs = model.example_inputs(device="cuda", dtype=torch.bfloat16)
+        model = ToyLinearModel().to(_DEVICE).to(dtype=torch.bfloat16)
+        example_inputs = model.example_inputs(device=_DEVICE, dtype=torch.bfloat16)
         quantize_(model, config, filter_fn=None)
         model(*example_inputs)
         assert isinstance(model.linear1.weight, Int4TilePackedTo4dTensor)
         assert isinstance(model.linear2.weight, IntxUnpackedToInt8Tensor)
 
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
+    @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available")
     def test_module_fqn_to_config_regex_precedence2(self):
         """Testing that full path config takes precedence over
         regex config in ModuleFqnToConfig, swapping
@@ -689,14 +689,14 @@ def test_module_fqn_to_config_regex_precedence2(self):
         )
         config2 = IntxWeightOnlyConfig()
         config = ModuleFqnToConfig({"re:linear.": config2, "linear1": config1})
-        model = ToyLinearModel().cuda().to(dtype=torch.bfloat16)
-        example_inputs = model.example_inputs(device="cuda", dtype=torch.bfloat16)
+        model = ToyLinearModel().to(_DEVICE).to(dtype=torch.bfloat16)
+        example_inputs = model.example_inputs(device=_DEVICE, dtype=torch.bfloat16)
         quantize_(model, config, filter_fn=None)
         model(*example_inputs)
         assert isinstance(model.linear1.weight, Int4TilePackedTo4dTensor)
         assert isinstance(model.linear2.weight, IntxUnpackedToInt8Tensor)
 
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
+    @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available")
     def test_module_fqn_to_config_regex_fullmatch(self):
         """Testing that we will only match the fqns that fully
         matches the regex
@@ -735,7 +735,7 @@ def example_inputs(self):
                 "linear3_full_match.bias": None,
             }
         )
-        model = M(dtype=torch.bfloat16, device="cuda")
+        model = M(dtype=torch.bfloat16, device=_DEVICE)
         example_inputs = model.example_inputs()
         quantize_(model, config, filter_fn=None)
         model(*example_inputs)
diff --git a/torchao/quantization/quantize_/workflows/int4/int4_tile_packed_to_4d_tensor.py b/torchao/quantization/quantize_/workflows/int4/int4_tile_packed_to_4d_tensor.py
@@ -120,7 +120,7 @@ def from_hp(
         # Validate kernel requirements
         orig_out_features, orig_in_features = hp_tensor.shape[-2:]
         # TODO: relax checks to enable quantizing in other platoforms and run in A100
-        if not torch.cuda.get_device_capability()[0] >= 8:
+        if torch.cuda.is_available() and not torch.cuda.get_device_capability()[0] >= 8:
             raise ValueError(
                 f"Cannot use tinygemm int4 kernel with a device of compute capability {torch.cuda.get_device_capability()}, the minimum compute capability is 8.0 for tensor core kernels."
             )

Original file line number	Diff line number	Diff line change
`@@ -120,7 +120,7 @@ def from_hp(`
`120`	`120`	`# Validate kernel requirements`
`121`	`121`	`orig_out_features, orig_in_features = hp_tensor.shape[-2:]`
`122`	`122`	`# TODO: relax checks to enable quantizing in other platoforms and run in A100`
`123`		`- if not torch.cuda.get_device_capability()[0] >= 8:`
	`123`	`+ if torch.cuda.is_available() and not torch.cuda.get_device_capability()[0] >= 8:`
`124`	`124`	`raise ValueError(`
`125`	`125`	`f"Cannot use tinygemm int4 kernel with a device of compute capability {torch.cuda.get_device_capability()}, the minimum compute capability is 8.0 for tensor core kernels."`
`126`	`126`	`)`