ModelTC · gushiqiao · Apr 1, 2026 · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/.gitignore b/.gitignore
@@ -23,4 +23,4 @@ save*
 *.pid
 *.ipynb*
 .venv/
-*.sh
+*.sh
diff --git a/configs/quantization/methods/KVQuant/rtn_w_a_pertensor_static_naive_quant_kv.yml b/configs/quantization/methods/KVQuant/rtn_w_a_pertensor_static_naive_quant_kv.yml
@@ -44,4 +44,4 @@ save:
     save_lightllm_kv_calib: True
     lightllm_kv_cache_name: kv_cache_calib.json
     save_fake: False
-    save_path: /path/to/save/
+    save_path: /path/to/save/
diff --git a/llmc/__main__.py b/llmc/__main__.py
@@ -18,9 +18,9 @@
 from llmc.data import BaseDataset
 from llmc.eval.utils import eval_model, get_eval_list
 from llmc.models import *
-from llmc.utils import (check_config, deploy_all_modality, get_modality,
-                        mkdirs, print_important_package_version, seed_all,
-                        collect_lightllm_kv_calib_json,
+from llmc.utils import (check_config, collect_lightllm_kv_calib_json,
+                        deploy_all_modality, get_modality, mkdirs,
+                        print_important_package_version, seed_all,
                         update_autoawq_quant_config,
                         update_lightx2v_quant_config, update_vllm_quant_config)
 from llmc.utils.registry_factory import ALGO_REGISTRY, MODEL_REGISTRY
@@ -290,4 +290,3 @@ def main(config):
     llmc_duration_time = llmc_end_time - llmc_start_time
     logger.info(f'llmc_duration_time: {llmc_duration_time} s')
     logger.info('--- llmc finished ---')
-
diff --git a/llmc/compression/quantization/kvquant.py b/llmc/compression/quantization/kvquant.py
@@ -1,4 +1,5 @@
 import copy
+
 import torch
 from loguru import logger
 from transformers import DynamicCache
@@ -13,7 +14,8 @@ class NaiveQuantKVCache(DynamicCache):
     def __init__(self, quant_type, kvquant_cfg, num_hidden_layers, num_samples=128, bsz=1):
         super().__init__()
 
-        # Copy the config to avoid mutating the original quantization config in static KV calibration.
+        # Copy the config to avoid mutating the original quantization
+        # config in static KV calibration.
         kvquant_cfg = copy.deepcopy(kvquant_cfg)
         assert kvquant_cfg.granularity in ['per_token', 'per_tensor', 'per_group', 'per_head']
         self.num_hidden_layers, self.num_samples, self.bsz = (

diff --git a/llmc/models/mixtral.py b/llmc/models/mixtral.py
@@ -59,7 +59,8 @@ def get_subsets_in_block(self, block):
         return self._get_subsets_fused(block)
 
     def _get_subsets_legacy(self, block):
-        """transformers <5.0: block.block_sparse_moe with ModuleList experts."""
+        """Transformers <5.0: block.block_sparse_moe with ModuleList
+        experts."""
         moe = block.block_sparse_moe
         return [
             {
@@ -106,7 +107,7 @@ def _get_subsets_legacy(self, block):
         ]
 
     def _get_subsets_fused(self, block):
-        """transformers >=5.0: block.mlp with fused MixtralExperts."""
+        """Transformers >=5.0: block.mlp with fused MixtralExperts."""
         moe = block.mlp
         return [
             {
-Original file line number
+Diff line change
@@ Expand Up / @@ -23,4 +23,4 @@ save* @@
     *.pid
     *.ipynb*
     .venv/
-    *.sh
+    *.sh