Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ save*
*.pid
*.ipynb*
.venv/
*.sh
*.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ save:
save_lightllm_kv_calib: True
lightllm_kv_cache_name: kv_cache_calib.json
save_fake: False
save_path: /path/to/save/
save_path: /path/to/save/
7 changes: 3 additions & 4 deletions llmc/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
from llmc.data import BaseDataset
from llmc.eval.utils import eval_model, get_eval_list
from llmc.models import *
from llmc.utils import (check_config, deploy_all_modality, get_modality,
mkdirs, print_important_package_version, seed_all,
collect_lightllm_kv_calib_json,
from llmc.utils import (check_config, collect_lightllm_kv_calib_json,
deploy_all_modality, get_modality, mkdirs,
print_important_package_version, seed_all,
update_autoawq_quant_config,
update_lightx2v_quant_config, update_vllm_quant_config)
from llmc.utils.registry_factory import ALGO_REGISTRY, MODEL_REGISTRY
Expand Down Expand Up @@ -290,4 +290,3 @@ def main(config):
llmc_duration_time = llmc_end_time - llmc_start_time
logger.info(f'llmc_duration_time: {llmc_duration_time} s')
logger.info('--- llmc finished ---')

4 changes: 3 additions & 1 deletion llmc/compression/quantization/kvquant.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import copy

import torch
from loguru import logger
from transformers import DynamicCache
Expand All @@ -13,7 +14,8 @@ class NaiveQuantKVCache(DynamicCache):
def __init__(self, quant_type, kvquant_cfg, num_hidden_layers, num_samples=128, bsz=1):
super().__init__()

# Copy the config to avoid mutating the original quantization config in static KV calibration.
# Copy the config to avoid mutating the original quantization
# config in static KV calibration.
kvquant_cfg = copy.deepcopy(kvquant_cfg)
assert kvquant_cfg.granularity in ['per_token', 'per_tensor', 'per_group', 'per_head']
self.num_hidden_layers, self.num_samples, self.bsz = (
Expand Down
5 changes: 3 additions & 2 deletions llmc/models/mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ def get_subsets_in_block(self, block):
return self._get_subsets_fused(block)

def _get_subsets_legacy(self, block):
"""transformers <5.0: block.block_sparse_moe with ModuleList experts."""
"""Transformers <5.0: block.block_sparse_moe with ModuleList
experts."""
moe = block.block_sparse_moe
return [
{
Expand Down Expand Up @@ -106,7 +107,7 @@ def _get_subsets_legacy(self, block):
]

def _get_subsets_fused(self, block):
"""transformers >=5.0: block.mlp with fused MixtralExperts."""
"""Transformers >=5.0: block.mlp with fused MixtralExperts."""
moe = block.mlp
return [
{
Expand Down
Loading